LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [GIT PULL] x86/build changes for v4.17
@ 2018-04-02  9:50 Ingo Molnar
  2018-04-02 21:44 ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: Ingo Molnar @ 2018-04-02  9:50 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-build-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-build-for-linus

   # HEAD: d6289f36aa7d5893d091a7a0c67eee7798719f03 x86/build: Don't pass in -D__KERNEL__ multiple times

The biggest change is the forcing of asm-goto support on x86, which effectively 
increases the GCC minimum supported version to gcc-4.5 (on x86).

There's also some Makefile and linker script cleanups.


  out-of-topic modifications in x86-build-for-linus:
  ----------------------------------------------------
  Makefile                           # e501ce957a78: x86: Force asm-goto

 Thanks,

	Ingo

------------------>
Cao jin (2):
      x86/build: Drop superfluous ALIGN from the linker script
      x86/build: Don't pass in -D__KERNEL__ multiple times

Peter Zijlstra (2):
      x86: Force asm-goto
      x86: Remove FAST_FEATURE_TESTS


 Makefile                          | 13 +++++++------
 arch/x86/Kconfig                  | 11 -----------
 arch/x86/Makefile                 |  7 +++++--
 arch/x86/boot/compressed/Makefile |  2 +-
 arch/x86/include/asm/cpufeature.h |  8 --------
 arch/x86/kernel/vmlinux.lds.S     |  7 +++----
 6 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/Makefile b/Makefile
index d65e2e229017..6fb6fd28a124 100644
--- a/Makefile
+++ b/Makefile
@@ -494,6 +494,13 @@ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
 RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
 export RETPOLINE_CFLAGS
 
+# check for 'asm goto'
+ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+  CC_HAVE_ASM_GOTO := 1
+  KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+  KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 ifeq ($(config-targets),1)
 # ===========================================================================
 # *config targets only - make sure prerequisites are updated, and descend
@@ -658,12 +665,6 @@ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
-# check for 'asm goto'
-ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
-	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
-	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
-endif
-
 include scripts/Makefile.kcov
 include scripts/Makefile.gcc-plugins
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78ec99..cb5b5907dbd6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -393,17 +393,6 @@ config X86_FEATURE_NAMES
 
 	  If in doubt, say Y.
 
-config X86_FAST_FEATURE_TESTS
-	bool "Fast CPU feature tests" if EMBEDDED
-	default y
-	---help---
-	  Some fast-paths in the kernel depend on the capabilities of the CPU.
-	  Say Y here for the kernel to patch in the appropriate code at runtime
-	  based on the capabilities of the CPU. The infrastructure for patching
-	  code at runtime takes up some additional space; space-constrained
-	  embedded systems may wish to say N here to produce smaller, slightly
-	  slower code.
-
 config X86_X2APIC
 	bool "Support x2apic"
 	depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 498c1b812300..a517852dad55 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -31,8 +31,7 @@ endif
 CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
 M16_CFLAGS	 := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
 
-REALMODE_CFLAGS	:= $(M16_CFLAGS) -g -Os -D__KERNEL__ \
-		   -DDISABLE_BRANCH_PROFILING \
+REALMODE_CFLAGS	:= $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
 		   -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
 		   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
 		   -mno-mmx -mno-sse
@@ -181,6 +180,10 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
   endif
 endif
 
+ifndef CC_HAVE_ASM_GOTO
+  $(error Compiler lacks asm-goto support.)
+endif
+
 #
 # Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
 # GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226).  There's no way
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f25e1530e064..f484ae0ece93 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ KCOV_INSTRUMENT		:= n
 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
 
-KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2
+KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 cflags-$(CONFIG_X86_32) := -march=i386
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 736771c9822e..b27da9602a6d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -140,7 +140,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 
 #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These will statically patch the target code for additional
@@ -196,13 +195,6 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 		boot_cpu_has(bit) :				\
 		_static_cpu_has(bit)				\
 )
-#else
-/*
- * Fall back to dynamic for gcc versions which don't support asm goto. Should be
- * a minority now anyway.
- */
-#define static_cpu_has(bit)		boot_cpu_has(bit)
-#endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
 #define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index b854ebf5851b..795f3a80e576 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -102,7 +102,6 @@ SECTIONS
 		_stext = .;
 		/* bootstrapping code */
 		HEAD_TEXT
-		. = ALIGN(8);
 		TEXT_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
@@ -200,7 +199,7 @@ SECTIONS
 		. = __vvar_beginning_hack + PAGE_SIZE;
 	} :data
 
-       . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
+	. = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
 
 	/* Init code and data - will be freed after init */
 	. = ALIGN(PAGE_SIZE);
@@ -368,8 +367,8 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);		/* keep VO_INIT_SIZE page aligned */
 	_end = .;
 
-        STABS_DEBUG
-        DWARF_DEBUG
+	STABS_DEBUG
+	DWARF_DEBUG
 
 	/* Sections to be discarded */
 	DISCARDS

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-02  9:50 [GIT PULL] x86/build changes for v4.17 Ingo Molnar
@ 2018-04-02 21:44 ` Linus Torvalds
  2018-04-02 22:38   ` Matthias Kaehlcke
  2018-04-03  8:59   ` Peter Zijlstra
  0 siblings, 2 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-02 21:44 UTC (permalink / raw)
  To: Ingo Molnar, Matthias Kaehlcke
  Cc: Linux Kernel Mailing List, Thomas Gleixner, Peter Zijlstra,
	Andrew Morton

On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> The biggest change is the forcing of asm-goto support on x86, which effectively
> increases the GCC minimum supported version to gcc-4.5 (on x86).

So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
to be forced, or can stay with old kernels).

No, my biggest worry is clang. What's the status there?

I've pulled this, and honestly, the disaster with
-fmerge-all-constants makes me think that clang isn't that good a
compiler choice anyway, but it's sad if this undoes a lot of clang
work just because of the worries about Spectre and mis-speculated
branches.

             Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-02 21:44 ` Linus Torvalds
@ 2018-04-02 22:38   ` Matthias Kaehlcke
  2018-04-03  1:26     ` Matthias Kaehlcke
  2018-04-03  8:59   ` Peter Zijlstra
  1 sibling, 1 reply; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-02 22:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
	Peter Zijlstra, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, Greg Kroah-Hartman

El Mon, Apr 02, 2018 at 02:44:48PM -0700 Linus Torvalds ha dit:

> On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > The biggest change is the forcing of asm-goto support on x86, which effectively
> > increases the GCC minimum supported version to gcc-4.5 (on x86).
> 
> So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
> to be forced, or can stay with old kernels).
> 
> No, my biggest worry is clang. What's the status there?

I know there is work in progress for asm-goto in clang, but I don't
know the details or an ETA. Some folks in cc might have more
information.

> I've pulled this, and honestly, the disaster with
> -fmerge-all-constants makes me think that clang isn't that good a
> compiler choice anyway, but it's sad if this undoes a lot of clang
> work just because of the worries about Spectre and mis-speculated
> branches.

It would indeed be very unfortunate to loose clang support again, now
that it just got added after years of joint efforts from different
people. And this wasn't exclusively kernel work, in my experience over
the past year the LLVM community was very open to adopt/implement
changes needed to build the kernel without ugly hacks. It's still not
a perfect world, but I think LLVM folks deserve some credit.

Couldn't we just raise the minimum gcc version without enforcing
asm-goto for clang (yet)? This would give almost everybody the desired
extra protection, and give clang some slack to implement asm goto.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-02 22:38   ` Matthias Kaehlcke
@ 2018-04-03  1:26     ` Matthias Kaehlcke
  0 siblings, 0 replies; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-03  1:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
	Peter Zijlstra, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, Greg Kroah-Hartman

El Mon, Apr 02, 2018 at 03:38:21PM -0700 Matthias Kaehlcke ha dit:

> El Mon, Apr 02, 2018 at 02:44:48PM -0700 Linus Torvalds ha dit:
> 
> > On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > > The biggest change is the forcing of asm-goto support on x86, which effectively
> > > increases the GCC minimum supported version to gcc-4.5 (on x86).
> > 
> > So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
> > to be forced, or can stay with old kernels).
> > 
> > No, my biggest worry is clang. What's the status there?
> 
> I know there is work in progress for asm-goto in clang, but I don't
> know the details or an ETA. Some folks in cc might have more
> information.

Forwarding Chandler Carruth's words:

"A number of folks from both Kernel and LLVM communities are looking at
how to implement asm-goto in Clang in a way that should both work for
the compiler and for the Kernel. So there is progress here, it isn't
just everyone sitting around and waiting. Having more time to finish
it would be appreciated as it isn't easy to implement."

> > I've pulled this, and honestly, the disaster with
> > -fmerge-all-constants makes me think that clang isn't that good a
> > compiler choice anyway, but it's sad if this undoes a lot of clang
> > work just because of the worries about Spectre and mis-speculated
> > branches.
> 
> It would indeed be very unfortunate to loose clang support again, now
> that it just got added after years of joint efforts from different
> people. And this wasn't exclusively kernel work, in my experience over
> the past year the LLVM community was very open to adopt/implement
> changes needed to build the kernel without ugly hacks. It's still not
> a perfect world, but I think LLVM folks deserve some credit.
> 
> Couldn't we just raise the minimum gcc version without enforcing
> asm-goto for clang (yet)? This would give almost everybody the desired
> extra protection, and give clang some slack to implement asm goto.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-02 21:44 ` Linus Torvalds
  2018-04-02 22:38   ` Matthias Kaehlcke
@ 2018-04-03  8:59   ` Peter Zijlstra
  2018-04-03  9:51     ` Ingo Molnar
  2018-04-03 17:36     ` Linus Torvalds
  1 sibling, 2 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-03  8:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Matthias Kaehlcke, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton

On Mon, Apr 02, 2018 at 02:44:48PM -0700, Linus Torvalds wrote:
> On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > The biggest change is the forcing of asm-goto support on x86, which effectively
> > increases the GCC minimum supported version to gcc-4.5 (on x86).
> 
> So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
> to be forced, or can stay with old kernels).
> 
> No, my biggest worry is clang. What's the status there?
> 
> I've pulled this, and honestly, the disaster with
> -fmerge-all-constants makes me think that clang isn't that good a
> compiler choice anyway, but it's sad if this undoes a lot of clang
> work just because of the worries about Spectre and mis-speculated
> branches.

It's not just spectre, I believe you yourself wanted to use asm-goto
somewhere in the x86 code:

  http://lkml.kernel.org/r/CA+55aFyCp-9Qqjcn9wp=VDp2KO7tfYuUMJxVKC75Xxu0wEB5Cw@mail.gmail.com

There was some KVM talk of relying on it here:

  http://lkml.kernel.org/r/6a5f2453-cf51-d491-db54-5f239caa29bc@redhat.com

And there's the comment here:

  https://elixir.bootlin.com/linux/v4.16-rc7/source/arch/x86/kvm/emulate.c#L457

As to the suitablility of using clang, there's also this unresolved
issue:

  http://lkml.kernel.org/r/20180321211931.GA111711@google.com

The fact that even without asm-goto they cannot correctly compile a
kernel and have sat on their hands regarding asm-goto for the past 7 odd
years makes me care very little.

And since they need to spin a new version of the compiler with all the
various bugs fixed, they might as well include asm-goto in that and be
done with it.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03  8:59   ` Peter Zijlstra
@ 2018-04-03  9:51     ` Ingo Molnar
  2018-04-03 12:09       ` Peter Zijlstra
  2018-04-03 18:06       ` Matthias Kaehlcke
  2018-04-03 17:36     ` Linus Torvalds
  1 sibling, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2018-04-03  9:51 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Matthias Kaehlcke, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, Apr 02, 2018 at 02:44:48PM -0700, Linus Torvalds wrote:
> > On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > > The biggest change is the forcing of asm-goto support on x86, which effectively
> > > increases the GCC minimum supported version to gcc-4.5 (on x86).
> > 
> > So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
> > to be forced, or can stay with old kernels).
> > 
> > No, my biggest worry is clang. What's the status there?
> > 
> > I've pulled this, and honestly, the disaster with
> > -fmerge-all-constants makes me think that clang isn't that good a
> > compiler choice anyway, but it's sad if this undoes a lot of clang
> > work just because of the worries about Spectre and mis-speculated
> > branches.
> 
> It's not just spectre, I believe you yourself wanted to use asm-goto
> somewhere in the x86 code:
> 
>   http://lkml.kernel.org/r/CA+55aFyCp-9Qqjcn9wp=VDp2KO7tfYuUMJxVKC75Xxu0wEB5Cw@mail.gmail.com
> 
> There was some KVM talk of relying on it here:
> 
>   http://lkml.kernel.org/r/6a5f2453-cf51-d491-db54-5f239caa29bc@redhat.com
> 
> And there's the comment here:
> 
>   https://elixir.bootlin.com/linux/v4.16-rc7/source/arch/x86/kvm/emulate.c#L457
> 
> As to the suitablility of using clang, there's also this unresolved
> issue:
> 
>   http://lkml.kernel.org/r/20180321211931.GA111711@google.com
> 
> The fact that even without asm-goto they cannot correctly compile a
> kernel and have sat on their hands regarding asm-goto for the past 7 odd
> years makes me care very little.
> 
> And since they need to spin a new version of the compiler with all the
> various bugs fixed, they might as well include asm-goto in that and be
> done with it.

So there's really two questions here:

 - This asm-goto change only impacts x86, is there any production x86 kernel being
   built with Clang? I think the Pixel kernel is built with Clang, but that's ARM.

 - Is there anyone on the Clang side _actually_ bending metal and working on 
   asm-goto support, with something like very early alpha test patches available, 
   etc.? Last I saw the communicated Clang POV was still that they wanted to do 
   something "better" (and incompatible to ...) asm-goto. Has this changed?

If it's being relied on, or if there's actually something firmly planned,
which we could track, then I'd have no problem with reverting this change
and waiting one more kernel cycle or so.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03  9:51     ` Ingo Molnar
@ 2018-04-03 12:09       ` Peter Zijlstra
  2018-04-03 18:06       ` Matthias Kaehlcke
  1 sibling, 0 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-03 12:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Matthias Kaehlcke, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton

On Tue, Apr 03, 2018 at 11:51:18AM +0200, Ingo Molnar wrote:
> If it's being relied on, or if there's actually something firmly planned,
> which we could track, then I'd have no problem with reverting this change
> and waiting one more kernel cycle or so.

I don't see why the clang people can't go back to using out of tree
patches to make their compile 'work' until they've caught up with 7-year
old tech.

Also, I'd really like to know if there are any plans to support
asm-cc-output, otherwise we'll have this same old drama all over again
in a few years :/

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03  8:59   ` Peter Zijlstra
  2018-04-03  9:51     ` Ingo Molnar
@ 2018-04-03 17:36     ` Linus Torvalds
  1 sibling, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-03 17:36 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Matthias Kaehlcke, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton

On Tue, Apr 3, 2018 at 1:59 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> It's not just spectre, I believe you yourself wanted to use asm-goto
> somewhere in the x86 code:

Absolutely. But I don't want to make it impossible for clang people to
get their work done.

                Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03  9:51     ` Ingo Molnar
  2018-04-03 12:09       ` Peter Zijlstra
@ 2018-04-03 18:06       ` Matthias Kaehlcke
  2018-04-03 21:58         ` Nick Desaulniers
  2018-04-04  9:30         ` Peter Zijlstra
  1 sibling, 2 replies; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-03 18:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Linus Torvalds, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, Guenter Roeck,
	Greg Hackmann, Greg Kroah-Hartman

El Tue, Apr 03, 2018 at 11:51:18AM +0200 Ingo Molnar ha dit:

> 
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Mon, Apr 02, 2018 at 02:44:48PM -0700, Linus Torvalds wrote:
> > > On Mon, Apr 2, 2018 at 2:50 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > > >
> > > > The biggest change is the forcing of asm-goto support on x86, which effectively
> > > > increases the GCC minimum supported version to gcc-4.5 (on x86).
> > > 
> > > So my biggest worry isn't gcc-4.5 (anybody who hasn't updated deserves
> > > to be forced, or can stay with old kernels).
> > > 
> > > No, my biggest worry is clang. What's the status there?
> > > 
> > > I've pulled this, and honestly, the disaster with
> > > -fmerge-all-constants makes me think that clang isn't that good a
> > > compiler choice anyway, but it's sad if this undoes a lot of clang
> > > work just because of the worries about Spectre and mis-speculated
> > > branches.
> > 
> > It's not just spectre, I believe you yourself wanted to use asm-goto
> > somewhere in the x86 code:
> > 
> >   http://lkml.kernel.org/r/CA+55aFyCp-9Qqjcn9wp=VDp2KO7tfYuUMJxVKC75Xxu0wEB5Cw@mail.gmail.com
> > 
> > There was some KVM talk of relying on it here:
> > 
> >   http://lkml.kernel.org/r/6a5f2453-cf51-d491-db54-5f239caa29bc@redhat.com
> > 
> > And there's the comment here:
> > 
> >   https://elixir.bootlin.com/linux/v4.16-rc7/source/arch/x86/kvm/emulate.c#L457
> > 
> > As to the suitablility of using clang, there's also this unresolved
> > issue:
> > 
> >   http://lkml.kernel.org/r/20180321211931.GA111711@google.com
> > 
> > The fact that even without asm-goto they cannot correctly compile a
> > kernel and have sat on their hands regarding asm-goto for the past 7 odd
> > years makes me care very little.
> > 
> > And since they need to spin a new version of the compiler with all the
> > various bugs fixed, they might as well include asm-goto in that and be
> > done with it.
> 
> So there's really two questions here:
> 
>  - This asm-goto change only impacts x86, is there any production x86 kernel being
>    built with Clang? I think the Pixel kernel is built with Clang, but that's ARM.

Yes, Chrome OS R67 (currently dev, soon beta) will ship a kernel built
with Clang for multiple x86 Chromebooks.

IIRC the kernel of the Android emulator is also built with Clang.

>  - Is there anyone on the Clang side _actually_ bending metal and working on 
>    asm-goto support, with something like very early alpha test patches available, 
>    etc.? Last I saw the communicated Clang POV was still that they wanted to do 
>    something "better" (and incompatible to ...) asm-goto. Has this changed?
> 
> If it's being relied on, or if there's actually something firmly planned,
> which we could track, then I'd have no problem with reverting this change
> and waiting one more kernel cycle or so.

It is actively been worked on, but AFAIK there are no public patches
available at this point.

>From Chandler Carruth who is involved in this effort:

  A number of folks from both Kernel and LLVM communities are looking at
  how to implement asm-goto in Clang in a way that should both work for
  the compiler and for the Kernel. So there is progress here, it isn't
  just everyone sitting around and waiting. Having more time to finish
  it would be appreciated as it isn't easy to implement.

Both Chrome OS and Android are interested in an upstream kernel that
builds with Clang, and we have compiler folks supporting us on the
Clang side. We ship devices with Clang built kernels and plan to do so
for future devices, so I think I can say we are committed to make this
work.

Given that it takes time for distributions to roll out new compiler
versions I would like to ask for a longer period of 'exemption' from
asm-goto for Clang, at least if it isn't an actual burden for the
kernel, like preventing important features from being added. An ideal
time would be after the next-next LTS version, if this is considered
too far out, after the next LTS version would be the second best time
IMO. Let me be clear, this is *not* to delay the implementation of
asm-goto, but to facilitate the use of Clang-built kernels by other
projects and distributions, as well as automated builds of upstream
kernels with Clang, without requiring necessarily the very latest
version of Clang or extra patches.

Thanks

Matthias

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03 18:06       ` Matthias Kaehlcke
@ 2018-04-03 21:58         ` Nick Desaulniers
  2018-04-04  9:19           ` Peter Zijlstra
  2018-04-04  9:38           ` Greg KH
  2018-04-04  9:30         ` Peter Zijlstra
  1 sibling, 2 replies; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-03 21:58 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Ingo Molnar, Peter Zijlstra, Linus Torvalds, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann, Greg KH

Speaking more with our internal LLVM teams, there ARE a few different
approaches to implementing asm-goto in LLVM proposed, by external parties
to Google.  These proposals haven't progressed to code review, so we've
asked our LLVM teams to reignite these discussions with increased priority,
if not implement the feature outright.  We (Google kernel AND llvm hackers)
are committed to supporting the Linux kernel being built with Clang.

I can see both sides where eventually a long-requested feature-request
should come to a head, especially with good evidence (
https://lkml.org/lkml/2018/2/14/895), but just as you wouldn't accept a
patch that doesn't compile with GCC, I'd like to request that we don't
merge patches that fail to compile with Clang (or at least start to think
what that might look like).  I realize that would increase the burden on
patch authors and maintainers, so I'm interested in better approaches or
ideas.

I've been in contact with the 0-day bot maintainers, kernel-ci maintainers,
and even run my own run down version of 0-day bot on my workstation
hourly.  I think those will help reduce the burden of testing patches with
multiple different compilers.
--
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03 21:58         ` Nick Desaulniers
@ 2018-04-04  9:19           ` Peter Zijlstra
  2018-04-04  9:38           ` Greg KH
  1 sibling, 0 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-04  9:19 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Matthias Kaehlcke, Ingo Molnar, Linus Torvalds, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann, Greg KH

On Tue, Apr 03, 2018 at 09:58:03PM +0000, Nick Desaulniers wrote:
> Speaking more with our internal LLVM teams, there ARE a few different
> approaches to implementing asm-goto in LLVM proposed, by external parties
> to Google.  These proposals haven't progressed to code review, so we've
> asked our LLVM teams to reignite these discussions with increased priority,
> if not implement the feature outright.  We (Google kernel AND llvm hackers)
> are committed to supporting the Linux kernel being built with Clang.
> 
> I can see both sides where eventually a long-requested feature-request
> should come to a head, especially with good evidence (
> https://lkml.org/lkml/2018/2/14/895), but just as you wouldn't accept a
> patch that doesn't compile with GCC, I'd like to request that we don't
> merge patches that fail to compile with Clang (or at least start to think
> what that might look like).

Again, I ask what the plans are for asm-cc-output, hard depending on
that is a few years out I imagine, but if you don't promise feature
parity for all the features we use, I can see this all happening again.

Also, it would be good to get input on the whole memory model situation;
esp. with people looking to do LTO builds, the C/C++ memory model can
cause us quite some grief, for specifics I feel we should start a new
thread. But this is another issue that's been raised several times
without feedback.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03 18:06       ` Matthias Kaehlcke
  2018-04-03 21:58         ` Nick Desaulniers
@ 2018-04-04  9:30         ` Peter Zijlstra
  2018-04-04 19:17           ` Matthias Kaehlcke
  1 sibling, 1 reply; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-04  9:30 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Ingo Molnar, Linus Torvalds, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, Guenter Roeck,
	Greg Hackmann, Greg Kroah-Hartman

On Tue, Apr 03, 2018 at 11:06:58AM -0700, Matthias Kaehlcke wrote:

> Yes, Chrome OS R67 (currently dev, soon beta) will ship a kernel built
> with Clang for multiple x86 Chromebooks.

But there are still _known_ miscompilations....

> Given that it takes time for distributions to roll out new compiler
> versions I would like to ask for a longer period of 'exemption' from
> asm-goto for Clang, at least if it isn't an actual burden for the
> kernel, like preventing important features from being added. An ideal
> time would be after the next-next LTS version, if this is considered
> too far out, after the next LTS version would be the second best time
> IMO. Let me be clear, this is *not* to delay the implementation of
> asm-goto, but to facilitate the use of Clang-built kernels by other
> projects and distributions, as well as automated builds of upstream
> kernels with Clang, without requiring necessarily the very latest
> version of Clang or extra patches.

I don't think that's sane or realistic, given that the very latest clang
is _known_ to miscompile the kernel. How can you want to support older
compilers that are therefore also known to not work correctly.

Next LTS is still a fair way out, if we take LTS release to be
every ~5 releases, the next one would be ~.19, that's still 3 releases
hence. That's a _long_ time.

I don't see the point in waiting that long for a compiler that doesn't
work even without asm-goto.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-03 21:58         ` Nick Desaulniers
  2018-04-04  9:19           ` Peter Zijlstra
@ 2018-04-04  9:38           ` Greg KH
  2018-04-04 16:49             ` Nick Desaulniers
  2018-04-04 16:53             ` Nick Desaulniers
  1 sibling, 2 replies; 52+ messages in thread
From: Greg KH @ 2018-04-04  9:38 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, Linus Torvalds,
	LKML, Thomas Gleixner, Andrew Morton, James Y Knight,
	Chandler Carruth, Stephen Hines, Kees Cook, groeck,
	Greg Hackmann

On Tue, Apr 03, 2018 at 09:58:03PM +0000, Nick Desaulniers wrote:
> Speaking more with our internal LLVM teams, there ARE a few different
> approaches to implementing asm-goto in LLVM proposed, by external parties
> to Google.  These proposals haven't progressed to code review, so we've
> asked our LLVM teams to reignite these discussions with increased priority,
> if not implement the feature outright.  We (Google kernel AND llvm hackers)
> are committed to supporting the Linux kernel being built with Clang.
> 
> I can see both sides where eventually a long-requested feature-request
> should come to a head, especially with good evidence (
> https://lkml.org/lkml/2018/2/14/895), but just as you wouldn't accept a
> patch that doesn't compile with GCC, I'd like to request that we don't
> merge patches that fail to compile with Clang (or at least start to think
> what that might look like).  I realize that would increase the burden on
> patch authors and maintainers, so I'm interested in better approaches or
> ideas.
> 
> I've been in contact with the 0-day bot maintainers, kernel-ci maintainers,
> and even run my own run down version of 0-day bot on my workstation
> hourly.  I think those will help reduce the burden of testing patches with
> multiple different compilers.

There are known-bugs with building a kernel with clang right now (I
pointed one out a few days ago about NULL checks being deleted from the
clang output for no good reason, which really is scary for obvious
reasons).  So while it is great that small subsets of the kernel can
work properly (or hopefully properly), with clang, it still isn't ready
to be considered a "fully supported and we can't change the kernel if we
break using it" option, sorry.

And don't tie _anything_ to a LTS kernel, that's exactly what those
kernels are NOT for.  You implement features and things in the kernel
when they are ready, and I'll pick a random LTS kernel out of the air
when I feel like it.  Never should the two intersect and matter.

So please, work on fixing up clang for asm-goto and other "features"
that the kernel requires, and maybe when all build options/configs are
really solid and working well, will we be able to properly consider it
as a reason to implement, or not implement, something in the kernel
source.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04  9:38           ` Greg KH
@ 2018-04-04 16:49             ` Nick Desaulniers
  2018-04-04 17:13               ` Linus Torvalds
  2018-04-04 16:53             ` Nick Desaulniers
  1 sibling, 1 reply; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-04 16:49 UTC (permalink / raw)
  To: Greg KH
  Cc: Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, Linus Torvalds,
	LKML, Thomas Gleixner, Andrew Morton, James Y Knight,
	Chandler Carruth, Stephen Hines, Kees Cook, groeck,
	Greg Hackmann

[-- Attachment #1: Type: text/plain, Size: 1408 bytes --]

On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:

> There are known-bugs with building a kernel with clang right now (I
> pointed one out a few days ago about NULL checks being deleted from the
> clang output for no good reason, which really is scary for obvious
> reasons).


Is this the thread you are referring to?
https://lkml.org/lkml/2018/3/27/1286

It's definitely something curious that I'll need to sit down and
investigate more.  If there are other known instances, it would be good to
let me know.


> So while it is great that small subsets of the kernel can
> work properly (or hopefully properly), with clang, it still isn't ready
> to be considered a "fully supported and we can't change the kernel if we
> break using it" option, sorry.


> And don't tie _anything_ to a LTS kernel, that's exactly what those
> kernels are NOT for.  You implement features and things in the kernel
> when they are ready, and I'll pick a random LTS kernel out of the air
> when I feel like it.  Never should the two intersect and matter.
>
> So please, work on fixing up clang for asm-goto and other "features"
> that the kernel requires, and maybe when all build options/configs are
> really solid and working well, will we be able to properly consider it
> as a reason to implement, or not implement, something in the kernel
> source.
>

Acknowledged.
-- 
Thanks,
~Nick Desaulniers

[-- Attachment #2: Type: text/html, Size: 2680 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04  9:38           ` Greg KH
  2018-04-04 16:49             ` Nick Desaulniers
@ 2018-04-04 16:53             ` Nick Desaulniers
  2018-04-04 16:59               ` Greg KH
  2018-04-04 19:32               ` Josh Poimboeuf
  1 sibling, 2 replies; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-04 16:53 UTC (permalink / raw)
  To: Greg KH
  Cc: Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, Linus Torvalds,
	LKML, Thomas Gleixner, Andrew Morton, James Y Knight,
	Chandler Carruth, Stephen Hines, Kees Cook, groeck,
	Greg Hackmann

(re-sending as plain text)

On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:
> There are known-bugs with building a kernel with clang right now (I
> pointed one out a few days ago about NULL checks being deleted from the
> clang output for no good reason, which really is scary for obvious
> reasons).

Is this the thread you are referring to?
https://lkml.org/lkml/2018/3/27/1286

It's definitely something curious that I'll need to sit down and
investigate more.  If there are other known instances, it would be good to
let me know.

> So while it is great that small subsets of the kernel can
> work properly (or hopefully properly), with clang, it still isn't ready
> to be considered a "fully supported and we can't change the kernel if we
> break using it" option, sorry.

> And don't tie _anything_ to a LTS kernel, that's exactly what those
> kernels are NOT for.  You implement features and things in the kernel
> when they are ready, and I'll pick a random LTS kernel out of the air
> when I feel like it.  Never should the two intersect and matter.

> So please, work on fixing up clang for asm-goto and other "features"
> that the kernel requires, and maybe when all build options/configs are
> really solid and working well, will we be able to properly consider it
> as a reason to implement, or not implement, something in the kernel
> source.

Acknowledged.
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 16:53             ` Nick Desaulniers
@ 2018-04-04 16:59               ` Greg KH
  2018-04-04 19:26                 ` James Y Knight
  2018-04-04 19:32               ` Josh Poimboeuf
  1 sibling, 1 reply; 52+ messages in thread
From: Greg KH @ 2018-04-04 16:59 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, Linus Torvalds,
	LKML, Thomas Gleixner, Andrew Morton, James Y Knight,
	Chandler Carruth, Stephen Hines, Kees Cook, groeck,
	Greg Hackmann

On Wed, Apr 04, 2018 at 04:53:52PM +0000, Nick Desaulniers wrote:
> (re-sending as plain text)
> 
> On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:
> > There are known-bugs with building a kernel with clang right now (I
> > pointed one out a few days ago about NULL checks being deleted from the
> > clang output for no good reason, which really is scary for obvious
> > reasons).
> 
> Is this the thread you are referring to?
> https://lkml.org/lkml/2018/3/27/1286
> 
> It's definitely something curious that I'll need to sit down and
> investigate more.  If there are other known instances, it would be good to
> let me know.

Here is another horrible work around that was needed to get clang to
stop generating invalid code, beaec533fc27 ("llist: clang: introduce
member_address_is_nonnull()")  That one caused a lot of odd failures by
users, I wonder what else is lurking in that same code pattern.  It's a
hard one to debug...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 16:49             ` Nick Desaulniers
@ 2018-04-04 17:13               ` Linus Torvalds
  2018-04-04 17:46                 ` Nick Desaulniers
  2018-04-04 23:10                 ` Nick Desaulniers
  0 siblings, 2 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-04 17:13 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Greg KH, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann

On Wed, Apr 4, 2018 at 9:49 AM, Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> It's definitely something curious that I'll need to sit down and investigate
> more.  If there are other known instances, it would be good to let me know.

Note that we explicitly use "-fno-delete-null-pointer-checks" because
we do *not* want NULL pointer check removal even in the case of a bug.

See commit a3ca86aea507 ("Add '-fno-delete-null-pointer-checks' to gcc
CFLAGS") for the reason: we had buggy code that accessed a pointer
before the NULL pointer check, but the bug was "benign" as long as the
compiler didn't actually remove the check.

And note how the buggy code *looked* safe. It was doing the right
check, it was just that the check was hidden and disabled by another
bug.

Removing the NULL pointer check turned a benign bug into a trivially
exploitable one by just mapping user space data at NULL (which avoided
the kernel oops, and then made the kernel use the user value!).

Now, admittedly we have a ton of other security features in place to
avoid these kinds of issues - SMAP helps on the hardware side, and not
allowing users to mmap() NULL in the first place helps with most
distributions, but the point remains: the kernel generally really
doesn't want optimizations that are perhaps allowed by the standard,
but that result in code generation that doesn't match the source code.

The NULL pointer removal is one such thing: don't remove checks in the
kernel based on "standard says". It's ok to do optimizations that are
based on "hardware does the exact same thing", but not on "the
standard says this is undefined so we can remove it".

Other examples of where the kernel doesn't want the compiler to do
"the standard says this is undefined so I can take shortcuts" include:

 -fno-strict-aliasing: the standard is just wrong and full of shit,
and the misguided type-based aliasing can cause serious problems for
the kernel (but also other code)

 -fno-strict-overflow: again, this is a stupid optimization that
purely depends on the compiler generating faster code by generating
incorrect code.

The one I'm actually upset about is when a compiler goes even
*further* and does things that are NOT EVEN allowed by the paper
standard, much less by real code.

The fact that clang by default enables "-fmerge-all-constants"
behavior is just inexcusable. That's not just "let's do invalid
optimizations based on undefined behavior". That's an actual "let's do
known invalid optimizations that are explicitly disallowed even by the
standard".

             Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 17:13               ` Linus Torvalds
@ 2018-04-04 17:46                 ` Nick Desaulniers
  2018-04-04 23:10                 ` Nick Desaulniers
  1 sibling, 0 replies; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-04 17:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Greg KH, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann

[-- Attachment #1: Type: text/plain, Size: 929 bytes --]

On Wed, Apr 4, 2018 at 10:13 AM Linus Torvalds <
torvalds@linux-foundation.org> wrote:

> On Wed, Apr 4, 2018 at 9:49 AM, Nick Desaulniers
> <ndesaulniers@google.com> wrote:
> >
> > It's definitely something curious that I'll need to sit down and
> investigate
> > more.  If there are other known instances, it would be good to let me
> know.
>

> The one I'm actually upset about is when a compiler goes even
> *further* and does things that are NOT EVEN allowed by the paper
> standard, much less by real code.
>
> The fact that clang by default enables "-fmerge-all-constants"
> behavior is just inexcusable. That's not just "let's do invalid
> optimizations based on undefined behavior". That's an actual "let's do
> known invalid optimizations that are explicitly disallowed even by the
> standard".
>

The LLVM developers are in agreement here:
https://bugs.llvm.org/show_bug.cgi?id=18538#c16
-- 
Thanks,
~Nick Desaulniers

[-- Attachment #2: Type: text/html, Size: 1677 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04  9:30         ` Peter Zijlstra
@ 2018-04-04 19:17           ` Matthias Kaehlcke
  2018-04-04 20:33             ` Arnd Bergmann
  2018-04-04 23:04             ` Nick Desaulniers
  0 siblings, 2 replies; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-04 19:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Linus Torvalds, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, Guenter Roeck,
	Greg Hackmann, Greg Kroah-Hartman

El Wed, Apr 04, 2018 at 11:30:07AM +0200 Peter Zijlstra ha dit:

> On Tue, Apr 03, 2018 at 11:06:58AM -0700, Matthias Kaehlcke wrote:
> 
> > Yes, Chrome OS R67 (currently dev, soon beta) will ship a kernel built
> > with Clang for multiple x86 Chromebooks.
> 
> But there are still _known_ miscompilations....

Our compiler team is looking into this (missing option
-fno-delete-null-pointer-checks)

So far we didn't encounter any actual issues clearly linked to this,
neither during internal testing nor from devices in the field (some
arm64 devices use a kernel built with clang since R63, some x86
devices shipped with a clang built kernel with R63 and R64). Obviously
there might be latent issues and we are working on fixing the
underlying problem.

> > Given that it takes time for distributions to roll out new compiler
> > versions I would like to ask for a longer period of 'exemption' from
> > asm-goto for Clang, at least if it isn't an actual burden for the
> > kernel, like preventing important features from being added. An ideal
> > time would be after the next-next LTS version, if this is considered
> > too far out, after the next LTS version would be the second best time
> > IMO. Let me be clear, this is *not* to delay the implementation of
> > asm-goto, but to facilitate the use of Clang-built kernels by other
> > projects and distributions, as well as automated builds of upstream
> > kernels with Clang, without requiring necessarily the very latest
> > version of Clang or extra patches.
> 
> I don't think that's sane or realistic, given that the very latest clang
> is _known_ to miscompile the kernel. How can you want to support older
> compilers that are therefore also known to not work correctly.
> 
> Next LTS is still a fair way out, if we take LTS release to be
> every ~5 releases, the next one would be ~.19, that's still 3 releases
> hence. That's a _long_ time.
> 
> I don't see the point in waiting that long for a compiler that doesn't
> work even without asm-goto.

Even with clang having known issues it would be preferable not to
break kernel builds with clang, if this doesn't place a signifcant
burden on the kernel. I'm not sure it is strictly necessary to 'wait'
for clang to enforce asm-goto for gcc (and thus the vast majority of
builds), which is the primary goal of your patch. Couldn't we just
exclude clang for now from raising the error when lack of asm-goto
support is detected?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 16:59               ` Greg KH
@ 2018-04-04 19:26                 ` James Y Knight
  2018-04-04 19:42                   ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: James Y Knight @ 2018-04-04 19:26 UTC (permalink / raw)
  To: gregkh
  Cc: Nick Desaulniers, mka, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, Linux Kernel Mailing List, tglx, Andrew Morton,
	Chandler Carruth, Stephen Hines, Kees Cook, groeck,
	Greg Hackmann

On Wed, Apr 4, 2018 at 12:59 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> Here is another horrible work around that was needed to get clang to
> stop generating invalid code, beaec533fc27 ("llist: clang: introduce
> member_address_is_nonnull()")  That one caused a lot of odd failures by
> users, I wonder what else is lurking in that same code pattern.  It's a
> hard one to debug...

I would note that this issue is an entirely different issue from the
null-pointer-deref-is-undefined-behavior optimizations, even though it may
seem superficially similar. For _this_ issue, the behavior at question is
that the compiler assumes that objects are contiguous in memory, and thus
that "&struct_pointer->member_at_nonzero_offset != NULL" is always true. I
don't really see clang ever getting a flag to stop assuming that objects
are contiguous.

Note that clang does actually emit an on-by-default warning for a situation
analogous to this:
   warning: comparison of address of 'p->sub' not equal to a null pointer is
always true [-Wtautological-pointer-compare]
   if (&p->sub != NULL) {
        ~~~^~~    ~~~~
...but unfortunately that warning is suppressed when it occurs in a
macro-expansion, so the llist_for_each_entry error was silent.

(OTOH, I _do_ expect clang to eventually gain support for a flag to treat
NULL-pointer deref as a legal operation instead of UB. I'm not really sure
it makes sense for the linux kernel, but it's a useful feature for the
compiler to provide in any case, so why not...)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 16:53             ` Nick Desaulniers
  2018-04-04 16:59               ` Greg KH
@ 2018-04-04 19:32               ` Josh Poimboeuf
  2018-06-07 19:23                 ` Nick Desaulniers
  1 sibling, 1 reply; 52+ messages in thread
From: Josh Poimboeuf @ 2018-04-04 19:32 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Greg KH, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, LKML, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines, Kees Cook,
	groeck, Greg Hackmann

On Wed, Apr 04, 2018 at 04:53:52PM +0000, Nick Desaulniers wrote:
> (re-sending as plain text)
> 
> On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:
> > There are known-bugs with building a kernel with clang right now (I
> > pointed one out a few days ago about NULL checks being deleted from the
> > clang output for no good reason, which really is scary for obvious
> > reasons).
> 
> Is this the thread you are referring to?
> https://lkml.org/lkml/2018/3/27/1286
> 
> It's definitely something curious that I'll need to sit down and
> investigate more.  If there are other known instances, it would be good to
> let me know.

As Matthias mentioned elsewhere, it sounds like they're planning to
implement -fno-delete-null-pointer-checks, which would presumably fix
the above issue.

Aside from the above-mentioned debugfs NULL checks, there was also
another (seemingly valid) objtool warning:

  arch/x86/mm/pti.o: warning: objtool: pti_init() falls through to next function pti_user_pagetable_walk_pmd()

I don't know if that one was also caused by removed NULL checks, but
it's worth investigating.

More details (and the .o file) here:

  https://lkml.kernel.org/r/20180319232255.GF37438@google.com

-- 
Josh

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 19:26                 ` James Y Knight
@ 2018-04-04 19:42                   ` Linus Torvalds
  2018-04-04 22:21                     ` James Y Knight
  0 siblings, 1 reply; 52+ messages in thread
From: Linus Torvalds @ 2018-04-04 19:42 UTC (permalink / raw)
  To: James Y Knight
  Cc: Greg Kroah-Hartman, Nick Desaulniers, Matthias Kaehlcke,
	Ingo Molnar, Peter Zijlstra, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton, Chandler Carruth, Stephen Hines,
	Kees Cook, groeck, Greg Hackmann

On Wed, Apr 4, 2018 at 12:26 PM, James Y Knight <jyknight@google.com> wrote:
>
> (OTOH, I _do_ expect clang to eventually gain support for a flag to treat
> NULL-pointer deref as a legal operation instead of UB. I'm not really sure
> it makes sense for the linux kernel, but it's a useful feature for the
> compiler to provide in any case, so why not...)

I would actually argue that this is more closely related to
"-fno-strict-overflow" than to "-fno-delete-null-pointer-checks'.

It just happens to be about pointer arithmetic, rather than about
signed integers. We *will* do arithmetic with NULL pointers that can
wrap.

Yes, the kernel does odd things with pointers in general. I won't
apologize for it, because we have really good reasons for doing so.

The whole "we take offsets of pointers even *backwards*" is because we
extensively rely on embedding structures inside each other. If clang
actually did proper optimization, it would have noticed that the
"offset backwards" was followed by an "offset forwards" and then a
NULL pointer check, and that there actually was no actual real
wrapping or non-contiguous behavior in reality.

But clang didn't do that, and instead blindly said "you're going
forwards and the result can't be NULL", without ever looking at "oh,
they went backwards first".

So honestly, part of the problem we had with clang was that it was too
*stupid* to see that what we did wasn't actually invalid even by
clang's own standards!

I'm not saying that the kernel use is really standards compliant,
because there definitely are those temporary pointer values (that are
never used!) that point outside an object.

But honestly, the clang "optimization" is really quite debatable, and
we'd want to turn it off - or just have clang be smarter enough that
it sees that "oh, it all stays within the object after all".

We do other things to pointers that the standard may not cover. Deal
with it. Any malloc() library will similarly just depend on pointer
arithmetic really working. We may admittedly take it to some
ridiculous degrees, with the whole "ok, we assign negative values to
pointers as error cases" in addition to the "we use container_of() to
look uip pointers *backwards*" thing.

So we'd definitely want that "-fno-strict-overflow" to affect pointer
arithmetic too (or have a separate flag for the pointer equivalent of
"we play games that may temporarily wrap pointer values around"..

                 Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 19:17           ` Matthias Kaehlcke
@ 2018-04-04 20:33             ` Arnd Bergmann
  2018-04-04 20:58               ` Matthias Kaehlcke
  2018-04-04 23:04             ` Nick Desaulniers
  1 sibling, 1 reply; 52+ messages in thread
From: Arnd Bergmann @ 2018-04-04 20:33 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Peter Zijlstra, Ingo Molnar, Linus Torvalds,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 9:17 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> El Wed, Apr 04, 2018 at 11:30:07AM +0200 Peter Zijlstra ha dit:
>
>> On Tue, Apr 03, 2018 at 11:06:58AM -0700, Matthias Kaehlcke wrote:
>>
>> > Yes, Chrome OS R67 (currently dev, soon beta) will ship a kernel built
>> > with Clang for multiple x86 Chromebooks.
>>
>> But there are still _known_ miscompilations....
>
> Our compiler team is looking into this (missing option
> -fno-delete-null-pointer-checks)

Do you know if anyone is looking into __builtin_constant_p()
optimization as well? We have a lot of uses of this gcc feature
in the kernel, and if I remember correctly, clang implements
this by basically always returning false for the cases we
are interested in.

In most cases, this is used to implement a fast-path for a helper
function, so not doing it the same way as gcc just results in
slower execution, but I assume we also have code that behaves
differently on clang compared to gcc because of this.

        Arnd

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 20:33             ` Arnd Bergmann
@ 2018-04-04 20:58               ` Matthias Kaehlcke
  2018-04-04 21:11                 ` Arnd Bergmann
  0 siblings, 1 reply; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-04 20:58 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Peter Zijlstra, Ingo Molnar, Linus Torvalds,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

El Wed, Apr 04, 2018 at 10:33:19PM +0200 Arnd Bergmann ha dit:

> On Wed, Apr 4, 2018 at 9:17 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> > El Wed, Apr 04, 2018 at 11:30:07AM +0200 Peter Zijlstra ha dit:
> >
> >> On Tue, Apr 03, 2018 at 11:06:58AM -0700, Matthias Kaehlcke wrote:
> >>
> >> > Yes, Chrome OS R67 (currently dev, soon beta) will ship a kernel built
> >> > with Clang for multiple x86 Chromebooks.
> >>
> >> But there are still _known_ miscompilations....
> >
> > Our compiler team is looking into this (missing option
> > -fno-delete-null-pointer-checks)
> 
> Do you know if anyone is looking into __builtin_constant_p()
> optimization as well? We have a lot of uses of this gcc feature
> in the kernel, and if I remember correctly, clang implements
> this by basically always returning false for the cases we
> are interested in.
> 
> In most cases, this is used to implement a fast-path for a helper
> function, so not doing it the same way as gcc just results in
> slower execution, but I assume we also have code that behaves
> differently on clang compared to gcc because of this.

I think I didn't come (knowingly) across that one yet. Could you point
me to an instance that could be used as an example in a bug report?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 20:58               ` Matthias Kaehlcke
@ 2018-04-04 21:11                 ` Arnd Bergmann
  2018-04-04 21:46                   ` Matthias Kaehlcke
  0 siblings, 1 reply; 52+ messages in thread
From: Arnd Bergmann @ 2018-04-04 21:11 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Peter Zijlstra, Ingo Molnar, Linus Torvalds,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 10:58 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> El Wed, Apr 04, 2018 at 10:33:19PM +0200 Arnd Bergmann ha dit:
>>
>> In most cases, this is used to implement a fast-path for a helper
>> function, so not doing it the same way as gcc just results in
>> slower execution, but I assume we also have code that behaves
>> differently on clang compared to gcc because of this.
>
> I think I didn't come (knowingly) across that one yet. Could you point
> me to an instance that could be used as an example in a bug report?

This code

#include <linux/math64.h>
int f(u64 u)
{
        return div_u64(u, 100000);
}

results in a call to __do_div64() on 32-bit arm using clang, but
gets optimized into a set of multiply+shift on gcc.

The same thing should happen on x86, but haven't tried it
because of the 'asm goto' build failure in linux-next with clang.

      Arnd

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 21:11                 ` Arnd Bergmann
@ 2018-04-04 21:46                   ` Matthias Kaehlcke
  2018-04-04 21:59                     ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-04 21:46 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Peter Zijlstra, Ingo Molnar, Linus Torvalds,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

El Wed, Apr 04, 2018 at 11:11:36PM +0200 Arnd Bergmann ha dit:

> On Wed, Apr 4, 2018 at 10:58 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> > El Wed, Apr 04, 2018 at 10:33:19PM +0200 Arnd Bergmann ha dit:
> >>
> >> In most cases, this is used to implement a fast-path for a helper
> >> function, so not doing it the same way as gcc just results in
> >> slower execution, but I assume we also have code that behaves
> >> differently on clang compared to gcc because of this.
> >
> > I think I didn't come (knowingly) across that one yet. Could you point
> > me to an instance that could be used as an example in a bug report?
> 
> This code
> 
> #include <linux/math64.h>
> int f(u64 u)
> {
>         return div_u64(u, 100000);
> }
> 
> results in a call to __do_div64() on 32-bit arm using clang, but
> gets optimized into a set of multiply+shift on gcc.

I understand this is annoying, but it seems I'm missing something:

static inline u64 div_u64(u64 dividend, u32 divisor)
{
        u32 remainder;
        return div_u64_rem(dividend, divisor, &remainder);
}

static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
{
        *remainder = do_div(dividend, divisor);
        return dividend;
}

#define do_div(n, base) __div64_32(&(n), base)

static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
{
        register unsigned int __base      asm("r4") = base;
        register unsigned long long __n   asm("r0") = *n;
        register unsigned long long __res asm("r2");
        register unsigned int __rem       asm(__xh);
        asm(    __asmeq("%0", __xh)
                __asmeq("%1", "r2")
                __asmeq("%2", "r0")
                __asmeq("%3", "r4")
                "bl     __do_div64"
                : "=r" (__rem), "=r" (__res)
                : "r" (__n), "r" (__base)
                : "ip", "lr", "cc");
        *n = __res;
        return __rem;
}

There is no reference to __builtin_constant_p(), could you elaborate?

Also you mentioned there are plenty of cases, maybe there is a more
straightforward one?

In any case it seems this derails a bit from the original topic of the
thread. Shall we take this offline?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 21:46                   ` Matthias Kaehlcke
@ 2018-04-04 21:59                     ` Linus Torvalds
  2018-04-04 22:17                       ` Matthias Kaehlcke
  0 siblings, 1 reply; 52+ messages in thread
From: Linus Torvalds @ 2018-04-04 21:59 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 2:46 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
>
> I understand this is annoying, but it seems I'm missing something:

I think you're looking at !AEABI case.

The AEABI case is worse. It ends up getting the code from
include/asm-generic/div64.h after defining a few helper inline asm
functions.

It's probably best if you start taking some recreational drugs
_before_ looking at that code.  It might make you go "Ooh, kewl!"
instead of trying to dig out your eyeballs with a rusty spoon.

               Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 21:59                     ` Linus Torvalds
@ 2018-04-04 22:17                       ` Matthias Kaehlcke
  2018-04-04 22:39                         ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-04 22:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

El Wed, Apr 04, 2018 at 02:59:09PM -0700 Linus Torvalds ha dit:

> On Wed, Apr 4, 2018 at 2:46 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> >
> > I understand this is annoying, but it seems I'm missing something:
> 
> I think you're looking at !AEABI case.
> 
> The AEABI case is worse. It ends up getting the code from
> include/asm-generic/div64.h after defining a few helper inline asm
> functions.
> 
> It's probably best if you start taking some recreational drugs
> _before_ looking at that code.  It might make you go "Ooh, kewl!"
> instead of trying to dig out your eyeballs with a rusty spoon.

Thanks, though I really should have followed your advice ...

Getting our compiler team high to look into this might affect a timely
(and correct ...) implementation of asm-goto and others important
features. Arnd, do you have another, preferably simple instance to
keep our compiler folks (halfway) sane?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 19:42                   ` Linus Torvalds
@ 2018-04-04 22:21                     ` James Y Knight
  2018-04-04 22:29                       ` Linus Torvalds
  2018-04-05  7:08                       ` Peter Zijlstra
  0 siblings, 2 replies; 52+ messages in thread
From: James Y Knight @ 2018-04-04 22:21 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: gregkh, Nick Desaulniers, mka, Ingo Molnar, Peter Zijlstra,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann

On Wed, Apr 4, 2018 at 3:42 PM Linus Torvalds
<torvalds@linux-foundation.org>
wrote:
> So we'd definitely want that "-fno-strict-overflow" to affect pointer
> arithmetic too (or have a separate flag for the pointer equivalent of
> "we play games that may temporarily wrap pointer values around"..

The -fno-strict-overflow flag in clang does do that. You can subtract any
two random pointers you have, whether they're in the same object or not,
even if the math overflows. And you can later add things back together, and
end up back where you started, and load the value. No problem, even though
subtracting pointers from unrelated objects is illegal according to C.

That's why container_of as it's written in the kernel does work in clang --
wrapping arithmetic when given a NULL pointer and everything. (as a
sidenote...it would be a readability improvement to make container_of do
its math with a uintptr_t instead of a void*, since it would be more
*obviously* correct...)

But allowing random pointer arithmetic, and pointer arithmetic wraparound,
is still different than asserting that an object _field access_ can
overflow. Clang does not believe that can happen -- it assumes that an
object will still be contiguous. And that's why the llist stuff used to be
broken, before it was corrected to do simply do math on a uintptr_t (which
is a nice and simple and sane fix!).

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 22:21                     ` James Y Knight
@ 2018-04-04 22:29                       ` Linus Torvalds
  2018-04-05  7:08                       ` Peter Zijlstra
  1 sibling, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-04 22:29 UTC (permalink / raw)
  To: James Y Knight
  Cc: Greg Kroah-Hartman, Nick Desaulniers, Matthias Kaehlcke,
	Ingo Molnar, Peter Zijlstra, Linux Kernel Mailing List,
	Thomas Gleixner, Andrew Morton, Chandler Carruth, Stephen Hines,
	Kees Cook, Guenter Roeck, Greg Hackmann

On Wed, Apr 4, 2018 at 3:21 PM, James Y Knight <jyknight@google.com> wrote:
>
> But allowing random pointer arithmetic, and pointer arithmetic wraparound,
> is still different than asserting that an object _field access_ can
> overflow.

But that's not what the code does.

It never _accessed_ the field. It only looked at the *address* of the field.

So clang got this case wrong:

        &(pos)->member != NULL

where that "&" thing is very much important. There was no access. An
access would in fact have been a bug (and was the bug that the
compiler caused, because it removed the check for NULL).

You may consider this an "access", but to me, it's all just pointer
arithmetic, and not in the least different from the kind of pointer
arithmetic that "offsetof()" traditionally does.

So I think your "it's a field access" is just a syntactic argument and
should not semantically be *any* different from doing arithmetic on a
pointer.

            Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 22:17                       ` Matthias Kaehlcke
@ 2018-04-04 22:39                         ` Linus Torvalds
  2018-04-04 23:31                           ` Matthias Kaehlcke
  0 siblings, 1 reply; 52+ messages in thread
From: Linus Torvalds @ 2018-04-04 22:39 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 3:17 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
>
> Getting our compiler team high to look into this might affect a timely
> (and correct ...) implementation of asm-goto and others important
> features. Arnd, do you have another, preferably simple instance to
> keep our compiler folks (halfway) sane?

I don't know if clang actually already gets this right or not, but as
an example of a case where we have a semantic difference between "is
this a constant or not", a much simpler case is in

 - arch/x86/include/asm/uaccess.h:

  /*
   * Test whether a block of memory is a valid user space address.
   * Returns 0 if the range is valid, nonzero otherwise.
   */
  static inline bool __chk_range_not_ok(unsigned long addr, unsigned
long size, unsigned long limit)
  {
        /*
         * If we have used "sizeof()" for the size,
         * we know it won't overflow the limit (but
         * it might overflow the 'addr', so it's
         * important to subtract the size from the
         * limit, not add it to the address).
         */
        if (__builtin_constant_p(size))
                return unlikely(addr > limit - size);

        /* Arbitrary sizes? Be careful about overflow */
        addr += size;
        if (unlikely(addr < size))
                return true;
        return unlikely(addr > limit);
  }

where the actual check itself is simplified for the constant size case
(because we know that constant sizes are never going to have the
overflow issue with the address size limit)

That inline function itself is then wrapped with a couple of macros,
and the usual use-case is through "access_ok()", which then typically
just gets either a sizeof(), or a non-constant length.

Of course, we've been walking away from having people do "access_ok()
+ __copy_from_user()" (the latter does some conceptually similar
optimizations on constant sizes), so those probably simply don't
matter much any more in practice.

But they are certainly a lot simpler to look at than the more exciting
32-bit asm-generic "do_div()" case is ;)

                  Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 19:17           ` Matthias Kaehlcke
  2018-04-04 20:33             ` Arnd Bergmann
@ 2018-04-04 23:04             ` Nick Desaulniers
  1 sibling, 0 replies; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-04 23:04 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Peter Zijlstra, Ingo Molnar, Linus Torvalds, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann, Greg KH

On Wed, Apr 4, 2018 at 12:17 PM Matthias Kaehlcke <mka@chromium.org> wrote:
> Even with clang having known issues it would be preferable not to
> break kernel builds with clang, if this doesn't place a signifcant
> burden on the kernel. I'm not sure it is strictly necessary to 'wait'
> for clang to enforce asm-goto for gcc (and thus the vast majority of
> builds), which is the primary goal of your patch. Couldn't we just
> exclude clang for now from raising the error when lack of asm-goto
> support is detected?

In particular, I worry that this now stalls adding clang support to 0-day,
since now this will fail outright to compile.
--
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 17:13               ` Linus Torvalds
  2018-04-04 17:46                 ` Nick Desaulniers
@ 2018-04-04 23:10                 ` Nick Desaulniers
  1 sibling, 0 replies; 52+ messages in thread
From: Nick Desaulniers @ 2018-04-04 23:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Greg KH, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra, LKML,
	Thomas Gleixner, Andrew Morton, James Y Knight, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann, Manoj Gupta

On Wed, Apr 4, 2018 at 10:13 AM Linus Torvalds <
torvalds@linux-foundation.org> wrote:
> The fact that clang by default enables "-fmerge-all-constants"
> behavior is just inexcusable. That's not just "let's do invalid
> optimizations based on undefined behavior". That's an actual "let's do
> known invalid optimizations that are explicitly disallowed even by the
> standard".

Manoj already has a patch for this that looks like it's passed code review:
https://reviews.llvm.org/D45289
--
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 22:39                         ` Linus Torvalds
@ 2018-04-04 23:31                           ` Matthias Kaehlcke
  2018-04-05  0:05                             ` Linus Torvalds
  2018-04-05  7:20                             ` Peter Zijlstra
  0 siblings, 2 replies; 52+ messages in thread
From: Matthias Kaehlcke @ 2018-04-04 23:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

El Wed, Apr 04, 2018 at 03:39:24PM -0700 Linus Torvalds ha dit:

> On Wed, Apr 4, 2018 at 3:17 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
> >
> > Getting our compiler team high to look into this might affect a timely
> > (and correct ...) implementation of asm-goto and others important
> > features. Arnd, do you have another, preferably simple instance to
> > keep our compiler folks (halfway) sane?
> 
> I don't know if clang actually already gets this right or not, but as
> an example of a case where we have a semantic difference between "is
> this a constant or not", a much simpler case is in
> 
>  - arch/x86/include/asm/uaccess.h:
> 
>   /*
>    * Test whether a block of memory is a valid user space address.
>    * Returns 0 if the range is valid, nonzero otherwise.
>    */
>   static inline bool __chk_range_not_ok(unsigned long addr, unsigned
> long size, unsigned long limit)
>   {
>         /*
>          * If we have used "sizeof()" for the size,
>          * we know it won't overflow the limit (but
>          * it might overflow the 'addr', so it's
>          * important to subtract the size from the
>          * limit, not add it to the address).
>          */
>         if (__builtin_constant_p(size))
>                 return unlikely(addr > limit - size);
> 
>         /* Arbitrary sizes? Be careful about overflow */
>         addr += size;
>         if (unlikely(addr < size))
>                 return true;
>         return unlikely(addr > limit);
>   }
> 
> where the actual check itself is simplified for the constant size case
> (because we know that constant sizes are never going to have the
> overflow issue with the address size limit)
> 
> That inline function itself is then wrapped with a couple of macros,
> and the usual use-case is through "access_ok()", which then typically
> just gets either a sizeof(), or a non-constant length.
> 
> Of course, we've been walking away from having people do "access_ok()
> + __copy_from_user()" (the latter does some conceptually similar
> optimizations on constant sizes), so those probably simply don't
> matter much any more in practice.
> 
> But they are certainly a lot simpler to look at than the more exciting
> 32-bit asm-generic "do_div()" case is ;)

Thanks, that was useful!

>From some experiments it looks like clang, in difference to gcc, does
not treat constant values passed as parameters to inline function as
constants.

I'll ask our compiler folks to take a look, with lower priority than
other issues in this thread though.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 23:31                           ` Matthias Kaehlcke
@ 2018-04-05  0:05                             ` Linus Torvalds
  2018-04-05  0:20                               ` Kees Cook
  2018-04-05  7:24                               ` Peter Zijlstra
  2018-04-05  7:20                             ` Peter Zijlstra
  1 sibling, 2 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-05  0:05 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 4:31 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
>
> From some experiments it looks like clang, in difference to gcc, does
> not treat constant values passed as parameters to inline function as
> constants.

Yeah, I think gcc used to have those semantics a long time ago too.

Many of our __builtin_constant_p() uses are indeed just in macros, but
certainly not all.

Other examples are found in our "fortified" string functions.

There a clang build will likely simply miss some of the build-time
fortification checks, and trigger them at runtime instead.

Of course, we hopefully don't *have* any build-time failures, because
gcc will have caught them, so you won't care as long as clang is a
secondary compiler, but long-term they'd be good.

> I'll ask our compiler folks to take a look, with lower priority than
> other issues in this thread though.

Ack. "asm goto" is way more important. The __builtin_constant_p()
stuff tends to be for various peephole optimizations.

Another example of that can be found in our x86 bit operations macros:

  static __always_inline void
  set_bit(long nr, volatile unsigned long *addr)
  {
        if (IS_IMMEDIATE(nr)) {
                asm volatile(LOCK_PREFIX "orb %1,%0"
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" ((u8)CONST_MASK(nr))
                        : "memory");
        } else {
                asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
                        : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
        }
  }

where that IS_IMMEDIATE() hides a __builtin_constant_p(). But we could
actually change that - for some reason the test_bit() case looks like
this:

  #define test_bit(nr, addr)                      \
        (__builtin_constant_p((nr))             \
         ? constant_test_bit((nr), (addr))      \
         : variable_test_bit((nr), (addr)))

which is much more straightforward anyway. I'm not quite sure why we
did it that odd way anyway, but I bet it's just "hysterical raisins"
along with the test_bit() not needing inline asm at all for the
constant case.

             Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  0:05                             ` Linus Torvalds
@ 2018-04-05  0:20                               ` Kees Cook
  2018-04-05  7:24                               ` Peter Zijlstra
  1 sibling, 0 replies; 52+ messages in thread
From: Kees Cook @ 2018-04-05  0:20 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Matthias Kaehlcke, Arnd Bergmann, Peter Zijlstra, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 4, 2018 at 5:05 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Wed, Apr 4, 2018 at 4:31 PM, Matthias Kaehlcke <mka@chromium.org> wrote:
>>
>> From some experiments it looks like clang, in difference to gcc, does
>> not treat constant values passed as parameters to inline function as
>> constants.
>
> Yeah, I think gcc used to have those semantics a long time ago too.
>
> Many of our __builtin_constant_p() uses are indeed just in macros, but
> certainly not all.
>
> Other examples are found in our "fortified" string functions.
>
> There a clang build will likely simply miss some of the build-time
> fortification checks, and trigger them at runtime instead.
>
> Of course, we hopefully don't *have* any build-time failures, because
> gcc will have caught them, so you won't care as long as clang is a
> secondary compiler, but long-term they'd be good.

Yeah, it's used in inline functions in a lot of places. Some quickly
jump out: kmalloc, crypto, bitmaps, networking, uaccess, kvm, etc from
doing a dumb grep as:

git grep -B5 __builtin_constant_p | grep -A5 inline

FWIW, I prefer inline functions over macros just to keep type checking
a some level of sanity when reading build warnings/errors. ;)

-Kees

-- 
Kees Cook
Pixel Security

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 22:21                     ` James Y Knight
  2018-04-04 22:29                       ` Linus Torvalds
@ 2018-04-05  7:08                       ` Peter Zijlstra
  2018-04-05 16:21                         ` James Y Knight
  1 sibling, 1 reply; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-05  7:08 UTC (permalink / raw)
  To: James Y Knight
  Cc: Linus Torvalds, gregkh, Nick Desaulniers, mka, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann

On Wed, Apr 04, 2018 at 10:21:05PM +0000, James Y Knight wrote:
> But allowing random pointer arithmetic, and pointer arithmetic wraparound,
> is still different than asserting that an object _field access_ can
> overflow. Clang does not believe that can happen -- it assumes that an
> object will still be contiguous. And that's why the llist stuff used to be
> broken, before it was corrected to do simply do math on a uintptr_t (which
> is a nice and simple and sane fix!).

That 'fix' wasn't anything simple, I recently ran into that
member_address_is_nonnull() trainwreck and had to think real hard wtf it
was about.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 23:31                           ` Matthias Kaehlcke
  2018-04-05  0:05                             ` Linus Torvalds
@ 2018-04-05  7:20                             ` Peter Zijlstra
  2018-04-05 17:46                               ` James Y Knight
  2018-04-05 17:47                               ` James Y Knight
  1 sibling, 2 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-05  7:20 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Linus Torvalds, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 04, 2018 at 04:31:11PM -0700, Matthias Kaehlcke wrote:

> From some experiments it looks like clang, in difference to gcc, does
> not treat constant values passed as parameters to inline function as
> constants.

Then you're also missing a heap of optimizations in code like
rb_erase_augmented() which is specifically constructed to take advantage
of constant propagation like that.

Other sites where we expect that to happen is __mutex_lock_common(),
__update_load_sum() and a bunch of others. There isn't strictly a bug
here, but not doing that constant propagation will still result in shit
code gen.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  0:05                             ` Linus Torvalds
  2018-04-05  0:20                               ` Kees Cook
@ 2018-04-05  7:24                               ` Peter Zijlstra
  2018-04-05  8:04                                 ` Ingo Molnar
  2018-04-05 16:43                                 ` Linus Torvalds
  1 sibling, 2 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-05  7:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Matthias Kaehlcke, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Wed, Apr 04, 2018 at 05:05:25PM -0700, Linus Torvalds wrote:
> for some reason the test_bit() case looks like
> this:
> 
>   #define test_bit(nr, addr)                      \
>         (__builtin_constant_p((nr))             \
>          ? constant_test_bit((nr), (addr))      \
>          : variable_test_bit((nr), (addr)))
> 
> which is much more straightforward anyway. I'm not quite sure why we
> did it that odd way anyway, but I bet it's just "hysterical raisins"
> along with the test_bit() not needing inline asm at all for the
> constant case.

I always assumed BT was a more expensive instruction than AND with
immediate.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  7:24                               ` Peter Zijlstra
@ 2018-04-05  8:04                                 ` Ingo Molnar
  2018-04-05  8:24                                   ` Peter Zijlstra
  2018-04-05 16:43                                 ` Linus Torvalds
  1 sibling, 1 reply; 52+ messages in thread
From: Ingo Molnar @ 2018-04-05  8:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Matthias Kaehlcke, Arnd Bergmann,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 04, 2018 at 05:05:25PM -0700, Linus Torvalds wrote:
> > for some reason the test_bit() case looks like
> > this:
> > 
> >   #define test_bit(nr, addr)                      \
> >         (__builtin_constant_p((nr))             \
> >          ? constant_test_bit((nr), (addr))      \
> >          : variable_test_bit((nr), (addr)))
> > 
> > which is much more straightforward anyway. I'm not quite sure why we
> > did it that odd way anyway, but I bet it's just "hysterical raisins"
> > along with the test_bit() not needing inline asm at all for the
> > constant case.
> 
> I always assumed BT was a more expensive instruction than AND with
> immediate.

According to:

   http://www.agner.org/optimize/instruction_tables.pdf

The SkyLake costs for 'BT', 'AND' and 'TEST' variants are:

         Instruction        Operands      uops fused    uops unfused       uops port    latency throughput
                  BT           r,r/i               1               1             p06          1        0.5
                  BT             m,r              10              10                                     5
                  BT             m,i               2               2         p06 p23                   0.5
         BTR BTS BTC           r,r/i               1               1             p06          1        0.5
         BTR BTS BTC             m,r              10              11                                     5
         BTR BTS BTC             m,i               3               4      p06 p4 p23                     1
          AND OR XOR           r,r/i               1               1           p0156          1       0.25
          AND OR XOR             r,m               1               2       p0156 p23                   0.5
          AND OR XOR           m,r/i               2               4 2p0156 2p237 p4          5          1
                TEST           r,r/i               1               1           p0156          1       0.25
                TEST           m,r/i               1               2       p0156 p23          1        0.5


So if I'm reading it right, the relevant comparison would be:

                  BT             m,i               2               2         p06 p23                   0.5
          AND OR XOR           m,r/i               2               4 2p0156 2p237 p4          5          1

?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  8:04                                 ` Ingo Molnar
@ 2018-04-05  8:24                                   ` Peter Zijlstra
  0 siblings, 0 replies; 52+ messages in thread
From: Peter Zijlstra @ 2018-04-05  8:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Matthias Kaehlcke, Arnd Bergmann,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Thu, Apr 05, 2018 at 10:04:46AM +0200, Ingo Molnar wrote:
>    http://www.agner.org/optimize/instruction_tables.pdf
> 
> The SkyLake costs for 'BT', 'AND' and 'TEST' variants are:
> 
>                   BT             m,i               2               2         p06 p23                   0.5

>                 TEST           m,r/i               1               2       p0156 p23          1        0.5

These two I would imagine (I tend to forget about the TEST instruction).
And while they're of equal speed, TEST has more ports available if I
read that right. But yes, on SKL it doesn't matter much.

But if you go back in history (a lot) then you'll find BT being far more
expensive than TEST.

On the original Pentium for example TEST-m,r/i is 2 cycles, but BT-m,i
is 4-9 cycles.

But yes, going by the tables that's all hysterical raisins, modern cores
don't much care.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  7:08                       ` Peter Zijlstra
@ 2018-04-05 16:21                         ` James Y Knight
  0 siblings, 0 replies; 52+ messages in thread
From: James Y Knight @ 2018-04-05 16:21 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, gregkh, Nick Desaulniers, mka, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Kees Cook, groeck, Greg Hackmann

On Thu, Apr 5, 2018 at 3:08 AM Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 04, 2018 at 10:21:05PM +0000, James Y Knight wrote:
> > But allowing random pointer arithmetic, and pointer arithmetic
wraparound,
> > is still different than asserting that an object _field access_ can
> > overflow. Clang does not believe that can happen -- it assumes that an
> > object will still be contiguous. And that's why the llist stuff used to
be
> > broken, before it was corrected to do simply do math on a uintptr_t
(which
> > is a nice and simple and sane fix!).

> That 'fix' wasn't anything simple, I recently ran into that
> member_address_is_nonnull() trainwreck and had to think real hard wtf it
> was about.

I agree the comment there could be clearer. You could replace it with
something like this (apologies: this patch is likely going to be mangled by
gmail's plaintext mode hard-wrapping it.)

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 85abc2915e8d..04e972a0bbe8 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -99,12 +99,15 @@ static inline void init_llist_head(struct llist_head
*list)
   *
   * This macro is conceptually the same as
   * &ptr->member != NULL
- * but it works around the fact that compilers can decide that taking a
member
- * address is never a NULL pointer.
- *
- * Real objects that start at a high address and have a member at NULL are
- * unlikely to exist, but such pointers may be returned e.g. by the
- * container_of() macro.
+ * except that it uses addition on a uintptr_t instead of member
+ * access syntax. This avoids running into a compiler assumption that
+ * objects must be contiguous in memory, and therefore that member
+ * address lookup cannot wrap, and therefore that a field with a
+ * positive offset within an object can never be at address 0.
+ *
+ * Real objects which start at a high address and have a member at
+ * NULL do not exist, but such a pointer is the result of applying
+ * container_of() to NULL, which llist_for_each_entry does.
   */
  #define member_address_is_nonnull(ptr, member) \
   ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  7:24                               ` Peter Zijlstra
  2018-04-05  8:04                                 ` Ingo Molnar
@ 2018-04-05 16:43                                 ` Linus Torvalds
  1 sibling, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-05 16:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Matthias Kaehlcke, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines,
	Nick Desaulniers, Kees Cook, Guenter Roeck, Greg Hackmann,
	Greg Kroah-Hartman

On Thu, Apr 5, 2018 at 12:24 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> I always assumed BT was a more expensive instruction than AND with
> immediate.

Oh, absolutely. That's why we do all those "depending on immediate or not".

The reason I brought that case up is that "test_bit()" and "set_bit()"
do this "is it constant" test COMPLETELY DIFFERENTLY.

The test_bit() one is arguably much more legible, and easier to
understand. And it so happens that clang will see that it's constant
because it's a macro (well, unless that macro is then used in an
inline function).

The set_bit() pattern looks completely different, and doesn't have
that abstraction of "constant_set_bit()" vs "variable_set_bit()", like
test_bit() does.

THAT was why I pointed it out - we do different things otherwise
similar operations.

Not because it would be odd that we do different things for the
"constant bit number" vs "variable bit number".

               Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  7:20                             ` Peter Zijlstra
@ 2018-04-05 17:46                               ` James Y Knight
  2018-04-05 18:06                                 ` Linus Torvalds
  2018-04-05 17:47                               ` James Y Knight
  1 sibling, 1 reply; 52+ messages in thread
From: James Y Knight @ 2018-04-05 17:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mka, Linus Torvalds, arnd, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, groeck,
	Greg Hackmann, gregkh

[-- Attachment #1: Type: text/plain, Size: 1480 bytes --]

On Thu, Apr 5, 2018 at 3:20 AM Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 04, 2018 at 04:31:11PM -0700, Matthias Kaehlcke wrote:
>
> > From some experiments it looks like clang, in difference to gcc, does
> > not treat constant values passed as parameters to inline function as
> > constants.
>
> Then you're also missing a heap of optimizations in code like
> rb_erase_augmented() which is specifically constructed to take advantage
> of constant propagation like that.
>
> Other sites where we expect that to happen is __mutex_lock_common(),
> __update_load_sum() and a bunch of others. There isn't strictly a bug
> here, but not doing that constant propagation will still result in shit
> code gen.
>

I think maybe you're confused; those functions do not appear to use
__builtin_constant_p, which is the issue at hand. Clang's optimizer is of
course not a complete joke...it can perfectly well optimize functions after
inlining in order to not generate "shit code gen".

GCC, however, mixes up the concept of a C "constant expression" with the
results of running optimization passes such as inlining for its
definition/implementation of __builtin_constant_p. Clang does not, and
quite likely will not ever, do that.

That said, I do believe there are ongoing discussions as to how to best
provide a useful alternative which is less semantically strange, and not
too difficult for to conditionally adopt for a gcc/clang-compatible
codebase such as the kernel.

[-- Attachment #2: Type: text/html, Size: 1882 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05  7:20                             ` Peter Zijlstra
  2018-04-05 17:46                               ` James Y Knight
@ 2018-04-05 17:47                               ` James Y Knight
  1 sibling, 0 replies; 52+ messages in thread
From: James Y Knight @ 2018-04-05 17:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mka, Linus Torvalds, arnd, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, groeck,
	Greg Hackmann, gregkh

I think maybe you're confused; those functions do not appear to use
__builtin_constant_p, which is the issue at hand. Clang's optimizer is of
course not a complete joke...it can perfectly well optimize functions after
inlining in order to not generate "shit code gen".

GCC, however, mixes up the concept of a C "constant expression" with the
results of running optimization passes such as inlining for its
definition/implementation of __builtin_constant_p. Clang does not, and
quite likely will not ever, do that.

That said, I do believe there are ongoing discussions as to how to best
provide a useful alternative which is less semantically strange, and not
too difficult for to conditionally adopt for a gcc/clang-compatible
codebase such as the kernel.

On Thu, Apr 5, 2018 at 3:20 AM Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 04, 2018 at 04:31:11PM -0700, Matthias Kaehlcke wrote:

> > From some experiments it looks like clang, in difference to gcc, does
> > not treat constant values passed as parameters to inline function as
> > constants.

> Then you're also missing a heap of optimizations in code like
> rb_erase_augmented() which is specifically constructed to take advantage
> of constant propagation like that.

> Other sites where we expect that to happen is __mutex_lock_common(),
> __update_load_sum() and a bunch of others. There isn't strictly a bug
> here, but not doing that constant propagation will still result in shit
> code gen.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05 17:46                               ` James Y Knight
@ 2018-04-05 18:06                                 ` Linus Torvalds
  2018-04-05 20:51                                   ` James Y Knight
  0 siblings, 1 reply; 52+ messages in thread
From: Linus Torvalds @ 2018-04-05 18:06 UTC (permalink / raw)
  To: James Y Knight
  Cc: Peter Zijlstra, Matthias Kaehlcke, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	Chandler Carruth, Stephen Hines, Nick Desaulniers, Kees Cook,
	Guenter Roeck, Greg Hackmann, Greg Kroah-Hartman

()() ha

On Thu, Apr 5, 2018 at 10:46 AM, James Y Knight <jyknight@google.com> wrote:
>
> GCC, however, mixes up the concept of a C "constant expression" with the
> results of running optimization passes such as inlining for its
> definition/implementation of __builtin_constant_p. Clang does not, and quite
> likely will not ever, do that.

Nothing has ever said that "__builtin_constant_p(x)" means "is x an
integer constant expression".

So there is no mix-up - or rather, *you* are the one mixing things up.

The gcc documentation says:

   "You can use the built-in function __builtin_constant_p to
determine if a value is known to be constant at compile time [...]"

Note that this has *nothing* to do with the concept of C "integer
constant expressions".

Yes, integer constant expressions obviously have values that are known
to be constant at compile time. But *other* things have values that
are known to be constant at compile time too.

In fact, we went through a long and painful thing exactly because we
wanted to get that "is this an ICE" value, and it turns out that the
best way to get that value has nothing to do with any gcc builtin at
all, but looks like this:

  /* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> *
  #define __is_constexpr(x) \
        (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))

which is actually impressively subtle, and almost entirely standard C
(the only real dependency is "sizeof(void)", and it not being the same
as "sizeof(int)").

So honestly, if your "__builtin_constant_p(x)" is just "is 'x' just a
C integer constant expression", then your __builtin_constant_p() is
not only not compatible with gcc, it isn't really even all that
useful. We can do it by hand without using a builtin at all.

So if any clang person thinks that it's gcc that is mixing up
concepts, you guys need to re-consider. It is simply not true - and
*should* not be true - that __builtin_constant_ps anything to do with
the "C integer constant expression".

It needs to go inside inline functions. That's how we use it, and we
often have reasons why it practically has to. Macro argument
evaluation rules (and lack of type handling) makes it too painful to
use macros everywhere.

And even when the __builtin_constant_p itself is in a macro, the macro
is then often used by inline functions, so..

                  Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05 18:06                                 ` Linus Torvalds
@ 2018-04-05 20:51                                   ` James Y Knight
  2018-04-05 21:13                                     ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: James Y Knight @ 2018-04-05 20:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, mka, arnd, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, groeck,
	Greg Hackmann, gregkh

On Thu, Apr 5, 2018 at 2:06 PM Linus Torvalds
<torvalds@linux-foundation.org>
wrote:
> On Thu, Apr 5, 2018 at 10:46 AM, James Y Knight <jyknight@google.com>
wrote:
> >
> > GCC, however, mixes up the concept of a C "constant expression" with the
> > results of running optimization passes such as inlining for its
> > definition/implementation of __builtin_constant_p. Clang does not, and
quite
> > likely will not ever, do that.

> Nothing has ever said that "__builtin_constant_p(x)" means "is x an
> integer constant expression"

I had actually meant that the __builtin_constant_p **itself** had to be a
constant expression, not that its *argument* must be an I-C-E for
__builtin_constant_p to return true.

But after spending some time on further investigating in order to show an
example of how this matters, I must take back my words. I was mistaken
about GCC's semantics.

Take this example:
===
int function(void);
void useval(int*);

int f() {
     int v = 1 + 2;
     int array[2][__builtin_constant_p(v) ? 1 : 100];
     useval(array[0]);
     return sizeof(array[function()]) / sizeof(array[0]);
}
===

Build with "gcc -O -std=c99":
===
f:
         subq    $24, %rsp
         leaq    8(%rsp), %rdi
         call    useval
         call    function
         movl    $4, %eax
         addq    $24, %rsp
         ret
===

Note the fact that "function" is actually *called* indicates that 'array'
is a VLA (...and that C99's sizeof(VLA) semantics are bonkers, but that's
another story...).

Which means that __builtin_constant_p(v) was _not_ evaluated as an integer
constant expression by GCC. Instead, it was left as an expression. And, the
stack frame being only 24 bytes indicates that the __bcp eventually
evaluated to true.

I actually think this actually _is_ something clang can implement. Thanks
for making me try to prove myself. :)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05 20:51                                   ` James Y Knight
@ 2018-04-05 21:13                                     ` Linus Torvalds
  2018-04-05 22:51                                       ` James Y Knight
  0 siblings, 1 reply; 52+ messages in thread
From: Linus Torvalds @ 2018-04-05 21:13 UTC (permalink / raw)
  To: James Y Knight
  Cc: Peter Zijlstra, Matthias Kaehlcke, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	Chandler Carruth, Stephen Hines, Nick Desaulniers, Kees Cook,
	Guenter Roeck, Greg Hackmann, Greg Kroah-Hartman

On Thu, Apr 5, 2018 at 1:51 PM, James Y Knight <jyknight@google.com> wrote:
>
> I had actually meant that the __builtin_constant_p **itself** had to be a
> constant expression, not that its *argument* must be an I-C-E for
> __builtin_constant_p to return true.

I actually really wish that were true, and that it would always be
considered an ICE.

But it's not, as you also found out.

This exact problem is why we had to come up with that crazy
alternative test for an actual integer constant expression.

> Which means that __builtin_constant_p(v) was _not_ evaluated as an integer
> constant expression by GCC. Instead, it was left as an expression. And, the
> stack frame being only 24 bytes indicates that the __bcp eventually
> evaluated to true.

Yeah.

The best of all worlds would be that __builtin_constant_p() would
itself always evaluate as an integer constant expression, but the
expression inside of it could be expanded as if it was not.

I realize that that can be very inconvenient for a compiler, since the
two are basically done at different points in the evaluation, but it's
the nicest semantics for the user.

But since gcc doesn't actually provide those semantics, clearly clang
doesn't have to either.

I claim that it *could* be done right, though, if you happened to care
about giving the best possible quality end result to the user.

Instead of doing the integer constant expression testing early (before
inlining etc), you do it later, but you carry along a bit in the
expression that says "was this expression actually _syntactically_ an
I-C-E?"

And btw, I hate how stupid gcc is about "constant size arrays but acts
as a VLA because it wasn't an integer-constant-expression size"
things.

Your code generation example really is a sad sad example of it. A good
optimizer should have generated the same code even if the stupid array
again syntactically was VLA, because it damn well isn't in reality.

So if you get really excited about this, and decide that clang can do
much better than gcc, I can only salute you!

             Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05 21:13                                     ` Linus Torvalds
@ 2018-04-05 22:51                                       ` James Y Knight
  2018-04-06  2:02                                         ` Linus Torvalds
  0 siblings, 1 reply; 52+ messages in thread
From: James Y Knight @ 2018-04-05 22:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, mka, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, tglx, Andrew Morton, Chandler Carruth,
	Stephen Hines, Nick Desaulniers, Kees Cook, groeck,
	Greg Hackmann, gregkh

On Thu, Apr 5, 2018 at 5:13 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> And btw, I hate how stupid gcc is about "constant size arrays but acts
> as a VLA because it wasn't an integer-constant-expression size"
> things.

> Your code generation example really is a sad sad example of it. A good
> optimizer should have generated the same code even if the stupid array
> again syntactically was VLA, because it damn well isn't in reality.

Unfortunately, that behavior is required by the standard, it's not up to
compiler optimization to change. Note that the optimizer in my example
_did_ determine that the VLA had a constant size, and generated
constant-size stack adjustment code. And it knew the result of the
sizeof(), and put the value "4" straight into the return register. The only
difference in the codegen from a non-VLA is the difference which was
required by the language standard -- the useless call to "function".

Note that the return value of the call is unused. And in fact, there is
literally no reason for the expression in "sizeof(expression)" to ever be
evaluated -- the result of the evaluation can _never_ be used! And, yet,
the C99 standard requires that it is evaluated, regardless, when the
resulting type of the expression is a VLA type. I have no idea why....

 From C99 6.5.3.4 "The sizeof operator", paragraph 2:
"""
    The sizeof operator yields the size (in bytes) of its operand, which may
be an expression or the parenthesized name of a type. The size is
determined from the type of the operand. The result is an integer. If the
type of the operand is a variable length array type, the operand is
evaluated; otherwise, the operand is not evaluated and the result is an
integer constant.
"""

(C11 says the same thing.)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-05 22:51                                       ` James Y Knight
@ 2018-04-06  2:02                                         ` Linus Torvalds
  0 siblings, 0 replies; 52+ messages in thread
From: Linus Torvalds @ 2018-04-06  2:02 UTC (permalink / raw)
  To: James Y Knight
  Cc: Peter Zijlstra, Matthias Kaehlcke, Arnd Bergmann, Ingo Molnar,
	Linux Kernel Mailing List, Thomas Gleixner, Andrew Morton,
	Chandler Carruth, Stephen Hines, Nick Desaulniers, Kees Cook,
	Guenter Roeck, Greg Hackmann, Greg Kroah-Hartman

On Thu, Apr 5, 2018 at 3:51 PM, James Y Knight <jyknight@google.com> wrote:
>
> Unfortunately, that behavior is required by the standard, it's not up to
> compiler optimization to change.

I actually mis-read your example - in your case it obviously does pass
the array itself down to the call, and yes, it obviously needs to be
allocated.

I had a test-case at one point where gcc avoided the stack allocation
entirely for a regular array, but not for a VLA (of the same constant
size) because the VLA logic is apparently different enough - even when
the size of the array is a compile-time constant.

We had that issue because we had a lot of trouble coming up with a
"max()" macro that was still an I-C-E (and we had a number of array
sizes that used "max()"). So all the array sizes were compile-time
constants, they just weren't traditional C arrays.

But now I can't recreate the thing. Maybe I had screwed up in my
test-case somehow.

               Linus

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-04-04 19:32               ` Josh Poimboeuf
@ 2018-06-07 19:23                 ` Nick Desaulniers
  2018-06-07 20:11                   ` Greg KH
  0 siblings, 1 reply; 52+ messages in thread
From: Nick Desaulniers @ 2018-06-07 19:23 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Greg KH, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, LKML, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines, Kees Cook,
	groeck, Greg Hackmann

On Wed, Apr 4, 2018 at 12:32 PM Josh Poimboeuf <jpoimboe@redhat.com> wrote:
>
> On Wed, Apr 04, 2018 at 04:53:52PM +0000, Nick Desaulniers wrote:
> > (re-sending as plain text)
> >
> > On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:
> > > There are known-bugs with building a kernel with clang right now (I
> > > pointed one out a few days ago about NULL checks being deleted from the
> > > clang output for no good reason, which really is scary for obvious
> > > reasons).
> >
> > Is this the thread you are referring to?
> > https://lkml.org/lkml/2018/3/27/1286
> >
> > It's definitely something curious that I'll need to sit down and
> > investigate more.  If there are other known instances, it would be good to
> > let me know.
>
> As Matthias mentioned elsewhere, it sounds like they're planning to
> implement -fno-delete-null-pointer-checks, which would presumably fix
> the above issue.

Just to follow this up, -fno-delete-null-pointer-checks is being added
to clang in:
https://reviews.llvm.org/D47894
https://reviews.llvm.org/D47895

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [GIT PULL] x86/build changes for v4.17
  2018-06-07 19:23                 ` Nick Desaulniers
@ 2018-06-07 20:11                   ` Greg KH
  0 siblings, 0 replies; 52+ messages in thread
From: Greg KH @ 2018-06-07 20:11 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Josh Poimboeuf, Matthias Kaehlcke, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, LKML, Thomas Gleixner, Andrew Morton,
	James Y Knight, Chandler Carruth, Stephen Hines, Kees Cook,
	groeck, Greg Hackmann

On Thu, Jun 07, 2018 at 12:23:31PM -0700, Nick Desaulniers wrote:
> On Wed, Apr 4, 2018 at 12:32 PM Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> >
> > On Wed, Apr 04, 2018 at 04:53:52PM +0000, Nick Desaulniers wrote:
> > > (re-sending as plain text)
> > >
> > > On Wed, Apr 4, 2018 at 2:38 AM Greg KH <gregkh@linuxfoundation.org> wrote:
> > > > There are known-bugs with building a kernel with clang right now (I
> > > > pointed one out a few days ago about NULL checks being deleted from the
> > > > clang output for no good reason, which really is scary for obvious
> > > > reasons).
> > >
> > > Is this the thread you are referring to?
> > > https://lkml.org/lkml/2018/3/27/1286
> > >
> > > It's definitely something curious that I'll need to sit down and
> > > investigate more.  If there are other known instances, it would be good to
> > > let me know.
> >
> > As Matthias mentioned elsewhere, it sounds like they're planning to
> > implement -fno-delete-null-pointer-checks, which would presumably fix
> > the above issue.
> 
> Just to follow this up, -fno-delete-null-pointer-checks is being added
> to clang in:
> https://reviews.llvm.org/D47894
> https://reviews.llvm.org/D47895

That's great news, thanks for letting us know.

greg k-h

^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2018-06-07 20:11 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-02  9:50 [GIT PULL] x86/build changes for v4.17 Ingo Molnar
2018-04-02 21:44 ` Linus Torvalds
2018-04-02 22:38   ` Matthias Kaehlcke
2018-04-03  1:26     ` Matthias Kaehlcke
2018-04-03  8:59   ` Peter Zijlstra
2018-04-03  9:51     ` Ingo Molnar
2018-04-03 12:09       ` Peter Zijlstra
2018-04-03 18:06       ` Matthias Kaehlcke
2018-04-03 21:58         ` Nick Desaulniers
2018-04-04  9:19           ` Peter Zijlstra
2018-04-04  9:38           ` Greg KH
2018-04-04 16:49             ` Nick Desaulniers
2018-04-04 17:13               ` Linus Torvalds
2018-04-04 17:46                 ` Nick Desaulniers
2018-04-04 23:10                 ` Nick Desaulniers
2018-04-04 16:53             ` Nick Desaulniers
2018-04-04 16:59               ` Greg KH
2018-04-04 19:26                 ` James Y Knight
2018-04-04 19:42                   ` Linus Torvalds
2018-04-04 22:21                     ` James Y Knight
2018-04-04 22:29                       ` Linus Torvalds
2018-04-05  7:08                       ` Peter Zijlstra
2018-04-05 16:21                         ` James Y Knight
2018-04-04 19:32               ` Josh Poimboeuf
2018-06-07 19:23                 ` Nick Desaulniers
2018-06-07 20:11                   ` Greg KH
2018-04-04  9:30         ` Peter Zijlstra
2018-04-04 19:17           ` Matthias Kaehlcke
2018-04-04 20:33             ` Arnd Bergmann
2018-04-04 20:58               ` Matthias Kaehlcke
2018-04-04 21:11                 ` Arnd Bergmann
2018-04-04 21:46                   ` Matthias Kaehlcke
2018-04-04 21:59                     ` Linus Torvalds
2018-04-04 22:17                       ` Matthias Kaehlcke
2018-04-04 22:39                         ` Linus Torvalds
2018-04-04 23:31                           ` Matthias Kaehlcke
2018-04-05  0:05                             ` Linus Torvalds
2018-04-05  0:20                               ` Kees Cook
2018-04-05  7:24                               ` Peter Zijlstra
2018-04-05  8:04                                 ` Ingo Molnar
2018-04-05  8:24                                   ` Peter Zijlstra
2018-04-05 16:43                                 ` Linus Torvalds
2018-04-05  7:20                             ` Peter Zijlstra
2018-04-05 17:46                               ` James Y Knight
2018-04-05 18:06                                 ` Linus Torvalds
2018-04-05 20:51                                   ` James Y Knight
2018-04-05 21:13                                     ` Linus Torvalds
2018-04-05 22:51                                       ` James Y Knight
2018-04-06  2:02                                         ` Linus Torvalds
2018-04-05 17:47                               ` James Y Knight
2018-04-04 23:04             ` Nick Desaulniers
2018-04-03 17:36     ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).