LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [git pull] x86 fixes
@ 2008-11-06 21:29 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2008-11-06 21:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Thomas Gleixner, H. Peter Anvin

Linus,

Please pull the latest x86-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-fixes-for-linus

 Thanks,

	Ingo

------------------>
Alok Kataria (1):
      x86: don't use tsc_khz to calculate lpj if notsc is passed

Ben Hutchings (1):
      x86: don't allow nr_irqs > NR_IRQS

Bjorn Helgaas (1):
      x86: mention ACPI in top-level Kconfig menu

Eduardo Habkost (1):
      Revert "x86: default to reboot via ACPI"

Hugh Dickins (1):
      x86: align DirectMap in /proc/meminfo

James Bottomley (1):
      x86, voyager: fix smp_intr_init() compile breakage

Joerg Roedel (2):
      AMD IOMMU: fix detection of NP capable IOMMUs
      AMD IOMMU: fix lazy IO/TLB flushing in unmap path

Mark McLoughlin (1):
      x86/docs: remove noirqbalance param docs

Suresh Siddha (1):
      x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR

Yinghai Lu (2):
      x86: size NR_IRQS on 32-bit systems the same way as 64-bit
      x86: remove VISWS and PARAVIRT around NR_IRQS puzzle


 Documentation/kernel-parameters.txt |    2 --
 arch/x86/Kconfig                    |    2 +-
 arch/x86/include/asm/irq_vectors.h  |   20 ++++++--------------
 arch/x86/include/asm/voyager.h      |    1 +
 arch/x86/kernel/amd_iommu.c         |    9 +++++++--
 arch/x86/kernel/io_apic.c           |    2 ++
 arch/x86/kernel/reboot.c            |    6 +-----
 arch/x86/kernel/tlb_32.c            |    6 ++++++
 arch/x86/kernel/tlb_64.c            |    5 +++++
 arch/x86/kernel/tsc.c               |    8 ++++----
 arch/x86/mach-voyager/setup.c       |    2 +-
 arch/x86/mach-voyager/voyager_smp.c |    2 +-
 arch/x86/mm/pageattr.c              |    8 ++++----
 13 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1bbcaa8..de4de3e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1470,8 +1470,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			Valid arguments: on, off
 			Default: on
 
-	noirqbalance	[X86-32,SMP,KNL] Disable kernel irq balancing
-
 	noirqdebug	[X86-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f20718..5d6aa40 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1494,7 +1494,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool X86_64
 	depends on NUMA
 
-menu "Power management options"
+menu "Power management and ACPI options"
 	depends on !X86_VOYAGER
 
 config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index d843ed0..0005adb 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,30 +101,22 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
 
-#elif !defined(CONFIG_X86_VOYAGER)
+#elif defined(CONFIG_X86_VOYAGER)
 
-# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
-
-#  define NR_IRQS		224
-
-# else /* IO_APIC || PARAVIRT */
-
-#  define NR_IRQS		16
-
-# endif
+# define NR_IRQS		224
 
-#else /* !VISWS && !VOYAGER */
+#else /* IO_APIC || VOYAGER */
 
-# define NR_IRQS		224
+# define NR_IRQS		16
 
-#endif /* VISWS */
+#endif
 
 /* Voyager specific defines */
 /* These define the CPIs we use in linux */
diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h
index 9c811d2..b3e6473 100644
--- a/arch/x86/include/asm/voyager.h
+++ b/arch/x86/include/asm/voyager.h
@@ -520,6 +520,7 @@ extern void voyager_restart(void);
 extern void voyager_cat_power_off(void);
 extern void voyager_cat_do_common_interrupt(void);
 extern void voyager_handle_nmi(void);
+extern void voyager_smp_intr_init(void);
 /* Commands for the following are */
 #define	VOYAGER_PSI_READ	0
 #define VOYAGER_PSI_WRITE	1
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a8fd9eb..331b318 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
-	return iommu->cap & IOMMU_CAP_NPCACHE;
+	return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
 }
 
 /****************************************************************************
@@ -536,6 +536,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 {
 	address >>= PAGE_SHIFT;
 	iommu_area_free(dom->bitmap, address, pages);
+
+	if (address + pages >= dom->next_bit)
+		dom->need_flush = true;
 }
 
 /****************************************************************************
@@ -992,8 +995,10 @@ static void __unmap_single(struct amd_iommu *iommu,
 
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
-	if (amd_iommu_unmap_flush)
+	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
 		iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+		dma_dom->need_flush = false;
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index b764d74..7a3f202 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void)
 	/* something wrong ? */
 	if (nr < nr_min)
 		nr = nr_min;
+	if (WARN_ON(nr > NR_IRQS))
+		nr = NR_IRQS;
 
 	return nr;
 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f4c93f1..724adfc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-/*
- * Keyboard reset and triple fault may result in INIT, not RESET, which
- * doesn't work when we're in vmx root mode.  Try ACPI first.
- */
-enum reboot_type reboot_type = BOOT_ACPI;
+enum reboot_type reboot_type = BOOT_KBD;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index e00534b..f4049f3 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	flush_mm = mm;
 	flush_va = va;
 	cpus_or(flush_cpumask, cpumask, flush_cpumask);
+
+	/*
+	 * Make the above memory operations globally visible before
+	 * sending the IPI.
+	 */
+	smp_mb();
 	/*
 	 * We have to send the IPI only to
 	 * CPUs affected.
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index dcbf7a1..8f919ca 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -183,6 +183,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
 
 	/*
+	 * Make the above memory operations globally visible before
+	 * sending the IPI.
+	 */
+	smp_mb();
+	/*
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 62348e4..2ef80e3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -813,10 +813,6 @@ void __init tsc_init(void)
 		cpu_khz = calibrate_cpu();
 #endif
 
-	lpj = ((u64)tsc_khz * 1000);
-	do_div(lpj, HZ);
-	lpj_fine = lpj;
-
 	printk("Detected %lu.%03lu MHz processor.\n",
 			(unsigned long)cpu_khz / 1000,
 			(unsigned long)cpu_khz % 1000);
@@ -836,6 +832,10 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
+	lpj = ((u64)tsc_khz * 1000);
+	do_div(lpj, HZ);
+	lpj_fine = lpj;
+
 	use_tsc_delay();
 	/* Check and install the TSC clocksource */
 	dmi_check_system(bad_tsc_dmi_table);
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 6bbdd63..a580b95 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -27,7 +27,7 @@ static struct irqaction irq2 = {
 void __init intr_init_hook(void)
 {
 #ifdef CONFIG_SMP
-	smp_intr_init();
+	voyager_smp_intr_init();
 #endif
 
 	setup_irq(2, &irq2);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 7f4c6af..0e33165 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1258,7 +1258,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 #define QIC_SET_GATE(cpi, vector) \
 	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
 
-void __init smp_intr_init(void)
+void __init voyager_smp_intr_init(void)
 {
 	int i;
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f1dc1b7..e89d248 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -67,18 +67,18 @@ static void split_page_count(int level)
 
 void arch_report_meminfo(struct seq_file *m)
 {
-	seq_printf(m, "DirectMap4k:  %8lu kB\n",
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_4K] << 2);
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-	seq_printf(m, "DirectMap2M:  %8lu kB\n",
+	seq_printf(m, "DirectMap2M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 11);
 #else
-	seq_printf(m, "DirectMap4M:  %8lu kB\n",
+	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
 #ifdef CONFIG_X86_64
 	if (direct_gbpages)
-		seq_printf(m, "DirectMap1G:  %8lu kB\n",
+		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
 #endif
 }

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2023-02-11  8:59 [GIT PULL] " Ingo Molnar
@ 2023-02-11 19:24 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2023-02-11 19:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Dave Hansen

The pull request you sent on Sat, 11 Feb 2023 09:59:20 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-02-11

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/47e9aa14ce5abca70d6584a8d8213707d197c38e

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2023-02-11  8:59 Ingo Molnar
  2023-02-11 19:24 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2023-02-11  8:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton, Dave Hansen

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-02-11

   # HEAD: f545e8831e70065e127f903fc7aca09aa50422c7 x86/cpu: Add Lunar Lake M

Fix a kprobes bug, plus add a new Intel model number to the
upstream <asm/intel-family.h> header for drivers to use.

 Thanks,

	Ingo

------------------>
Kan Liang (1):
      x86/cpu: Add Lunar Lake M

Nadav Amit (1):
      x86/kprobes: Fix 1 byte conditional jump target


 arch/x86/include/asm/intel-family.h | 2 ++
 arch/x86/kernel/kprobes/core.c      | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 347707d459c6..cbaf174d8efd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -123,6 +123,8 @@
 #define INTEL_FAM6_METEORLAKE		0xAC
 #define INTEL_FAM6_METEORLAKE_L		0xAA
 
+#define INTEL_FAM6_LUNARLAKE_M		0xBD
+
 /* "Small Core" Processors (Atom/E-Core) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index b36f3c367cb2..695873c0f50b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -625,7 +625,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
 		/* 1 byte conditional jump */
 		p->ainsn.emulate_op = kprobe_emulate_jcc;
 		p->ainsn.jcc.type = opcode & 0xf;
-		p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+		p->ainsn.rel32 = insn->immediate.value;
 		break;
 	case 0x0f:
 		opcode = insn->opcode.bytes[1];

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2022-08-28 14:57 Ingo Molnar
@ 2022-08-28 18:18 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2022-08-28 18:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Dave Hansen, H. Peter Anvin, x86

The pull request you sent on Sun, 28 Aug 2022 16:57:20 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-28

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/2f23a7c914317ac0b2a7e2bbe48dc00213652f98

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2022-08-28 14:57 Ingo Molnar
  2022-08-28 18:18 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2022-08-28 14:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Dave Hansen, H. Peter Anvin, x86

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-28

   # HEAD: 00da0cb385d05a89226e150a102eb49d8abb0359 Documentation/ABI: Mention retbleed vulnerability info file for sysfs

Misc fixes:

 - Fix PAT on Xen, which caused i915 driver failures
 - Fix compat INT 80 entry crash on Xen PV guests
 - Fix 'MMIO Stale Data' mitigation status reporting on older Intel CPUs
 - Fix RSB stuffing regressions
 - Fix ORC unwinding on ftrace trampolines
 - Add Intel Raptor Lake CPU model number
 - Fix (work around) a SEV-SNP bootloader bug providing bogus values in
   boot_params->cc_blob_address, by ignoring the value on !SEV-SNP bootups.
 - Fix SEV-SNP early boot failure
 - Fix the objtool list of noreturn functions and annotate snp_abort(),
   which bug confused objtool on gcc-12.
 - Fix the documentation for retbleed

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/sev: Mark snp_abort() noreturn

Chen Zhongjin (1):
      x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry

Jan Beulich (1):
      x86/PAT: Have pat_enabled() properly reflect state when running on Xen

Juergen Gross (1):
      x86/entry: Fix entry_INT80_compat for Xen PV guests

Michael Roth (1):
      x86/boot: Don't propagate uninitialized boot_params->cc_blob_address

Pawan Gupta (1):
      x86/bugs: Add "unknown" reporting for MMIO Stale Data

Peter Zijlstra (2):
      x86/nospec: Unwreck the RSB stuffing
      x86/nospec: Fix i386 RSB stuffing

Salvatore Bonaccorso (1):
      Documentation/ABI: Mention retbleed vulnerability info file for sysfs

Tom Lendacky (1):
      x86/sev: Don't use cc_platform_has() for early SEV-SNP calls

Tony Luck (1):
      x86/cpu: Add new Raptor Lake CPU model number


 Documentation/ABI/testing/sysfs-devices-system-cpu |  1 +
 .../hw-vuln/processor_mmio_stale_data.rst          | 14 ++++
 arch/x86/boot/compressed/misc.h                    | 12 ++-
 arch/x86/boot/compressed/sev.c                     |  8 ++
 arch/x86/entry/entry_64_compat.S                   |  2 +-
 arch/x86/include/asm/cpufeatures.h                 |  5 +-
 arch/x86/include/asm/intel-family.h                |  2 +
 arch/x86/include/asm/nospec-branch.h               | 92 ++++++++++++----------
 arch/x86/include/asm/sev.h                         |  2 +-
 arch/x86/kernel/cpu/bugs.c                         | 14 +++-
 arch/x86/kernel/cpu/common.c                       | 42 ++++++----
 arch/x86/kernel/sev.c                              | 18 ++++-
 arch/x86/kernel/unwind_orc.c                       | 15 ++--
 arch/x86/mm/pat/memtype.c                          | 10 ++-
 tools/objtool/check.c                              | 34 ++++----
 15 files changed, 183 insertions(+), 88 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 5bf61881f012..760c889b6cd1 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -523,6 +523,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 		/sys/devices/system/cpu/vulnerabilities/itlb_multihit
 		/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+		/sys/devices/system/cpu/vulnerabilities/retbleed
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 9393c50b5afc..c98fd11907cc 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -230,6 +230,20 @@ The possible values in this file are:
      * - 'Mitigation: Clear CPU buffers'
        - The processor is vulnerable and the CPU buffer clearing mitigation is
          enabled.
+     * - 'Unknown: No mitigations'
+       - The processor vulnerability status is unknown because it is
+	 out of Servicing period. Mitigation is not attempted.
+
+Definitions:
+------------
+
+Servicing period: The process of providing functional and security updates to
+Intel processors or platforms, utilizing the Intel Platform Update (IPU)
+process or other similar mechanisms.
+
+End of Servicing Updates (ESU): ESU is the date at which Intel will no
+longer provide Servicing, such as through IPU or other similar update
+processes. ESU dates will typically be aligned to end of quarter.
 
 If the processor is vulnerable then the following information is appended to
 the above information:
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 4910bf230d7b..62208ec04ca4 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr);
 void snp_set_page_shared(unsigned long paddr);
 void sev_prep_identity_maps(unsigned long top_level_pgt);
 #else
-static inline void sev_enable(struct boot_params *bp) { }
+static inline void sev_enable(struct boot_params *bp)
+{
+	/*
+	 * bp->cc_blob_address should only be set by boot/compressed kernel.
+	 * Initialize it to 0 unconditionally (thus here in this stub too) to
+	 * ensure that uninitialized values from buggy bootloaders aren't
+	 * propagated.
+	 */
+	if (bp)
+		bp->cc_blob_address = 0;
+}
 static inline void sev_es_shutdown_ghcb(void) { }
 static inline bool sev_es_check_ghcb_fault(unsigned long address)
 {
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 52f989f6acc2..c93930d5ccbd 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp)
 	struct msr m;
 	bool snp;
 
+	/*
+	 * bp->cc_blob_address should only be set by boot/compressed kernel.
+	 * Initialize it to 0 to ensure that uninitialized values from
+	 * buggy bootloaders aren't propagated.
+	 */
+	if (bp)
+		bp->cc_blob_address = 0;
+
 	/*
 	 * Setup/preliminary detection of SNP. This will be sanity-checked
 	 * against CPUID/MSR values later.
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 682338e7e2a3..4dd19819053a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat)
 	 * Interrupts are off on entry.
 	 */
 	ASM_CLAC			/* Do this early to minimize exposure */
-	SWAPGS
+	ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
 
 	/*
 	 * User tracing code (ptrace or signal handlers) might assume that
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 235dc85c91c3..ef4775c6db01 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -457,7 +457,8 @@
 #define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
 #define X86_BUG_SRBDS			X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
 #define X86_BUG_MMIO_STALE_DATA		X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_RETBLEED		X86_BUG(26) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB		X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED		X86_BUG(27) /* CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB		X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index def6ca121111..aeb38023a703 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -27,6 +27,7 @@
  *		_X	- regular server parts
  *		_D	- micro server parts
  *		_N,_P	- other mobile parts
+ *		_S	- other client parts
  *
  *		Historical OPTDIFFs:
  *
@@ -112,6 +113,7 @@
 
 #define INTEL_FAM6_RAPTORLAKE		0xB7
 #define INTEL_FAM6_RAPTORLAKE_P		0xBA
+#define INTEL_FAM6_RAPTORLAKE_S		0xBF
 
 /* "Small Core" Processors (Atom) */
 
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e64fd20778b6..c936ce9f0c47 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,56 @@
 #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
 
 /*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT			\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
+	call	772f;				\
+	int3;					\
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
  * Google experimented with loop-unrolling and this turned out to be
  * the optimal version - two calls, each with their own speculation
  * trap should their return address end up getting used, in a loop.
  */
-#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
-	mov	$(nr/2), reg;			\
-771:						\
-	ANNOTATE_INTRA_FUNCTION_CALL;		\
-	call	772f;				\
-773:	/* speculation trap */			\
-	UNWIND_HINT_EMPTY;			\
-	pause;					\
-	lfence;					\
-	jmp	773b;				\
-772:						\
-	ANNOTATE_INTRA_FUNCTION_CALL;		\
-	call	774f;				\
-775:	/* speculation trap */			\
-	UNWIND_HINT_EMPTY;			\
-	pause;					\
-	lfence;					\
-	jmp	775b;				\
-774:						\
-	add	$(BITS_PER_LONG/8) * 2, sp;	\
-	dec	reg;				\
-	jnz	771b;				\
-	/* barrier for jnz misprediction */	\
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr)			\
+	mov	$(nr/2), reg;				\
+771:							\
+	__FILL_RETURN_SLOT				\
+	__FILL_RETURN_SLOT				\
+	add	$(BITS_PER_LONG/8) * 2, %_ASM_SP;	\
+	dec	reg;					\
+	jnz	771b;					\
+	/* barrier for jnz misprediction */		\
+	lfence;
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr)			\
+	.rept nr;					\
+	__FILL_RETURN_SLOT;				\
+	.endr;						\
+	add	$(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN				\
+	__FILL_RETURN_SLOT				\
+	add	$(BITS_PER_LONG/8), %_ASM_SP;		\
 	lfence;
 
 #ifdef __ASSEMBLY__
@@ -132,28 +155,15 @@
 #endif
 .endm
 
-.macro ISSUE_UNBALANCED_RET_GUARD
-	ANNOTATE_INTRA_FUNCTION_CALL
-	call .Lunbalanced_ret_guard_\@
-	int3
-.Lunbalanced_ret_guard_\@:
-	add $(BITS_PER_LONG/8), %_ASM_SP
-	lfence
-.endm
-
  /*
   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
   * monstrosity above, manually.
   */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
-	ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
-	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
-	__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
-	ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+		__stringify(__FILL_ONE_RETURN), \ftr2
+
 .Lskip_rsb_\@:
 .endm
 
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 4a23e52fe0ee..ebc271bb6d8e 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
 void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
 void snp_set_wakeup_secondary_cpu(void);
 bool snp_init(struct boot_params *bp);
-void snp_abort(void);
+void __init __noreturn snp_abort(void);
 int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 510d85261132..da7c361f47e0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void)
 	u64 ia32_cap;
 
 	if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
-	    cpu_mitigations_off()) {
+	     boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
+	     cpu_mitigations_off()) {
 		mmio_mitigation = MMIO_MITIGATION_OFF;
 		return;
 	}
@@ -538,6 +539,8 @@ static void __init md_clear_update_mitigation(void)
 		pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
 	if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
 		pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+	else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+		pr_info("MMIO Stale Data: Unknown: No mitigations\n");
 }
 
 static void __init md_clear_select_mitigation(void)
@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf)
 
 static ssize_t mmio_stale_data_show_state(char *buf)
 {
+	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+		return sysfs_emit(buf, "Unknown: No mitigations\n");
+
 	if (mmio_mitigation == MMIO_MITIGATION_OFF)
 		return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
 
@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		return srbds_show_state(buf);
 
 	case X86_BUG_MMIO_STALE_DATA:
+	case X86_BUG_MMIO_UNKNOWN:
 		return mmio_stale_data_show_state(buf);
 
 	case X86_BUG_RETBLEED:
@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
 
 ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+		return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
+	else
+		return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
 }
 
 ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 64a73f415f03..3e508f239098 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define NO_SWAPGS		BIT(6)
 #define NO_ITLB_MULTIHIT	BIT(7)
 #define NO_SPECTRE_V2		BIT(8)
-#define NO_EIBRS_PBRSB		BIT(9)
+#define NO_MMIO			BIT(9)
+#define NO_EIBRS_PBRSB		BIT(10)
 
 #define VULNWL(vendor, family, model, whitelist)	\
 	X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL(VORTEX,	6, X86_MODEL_ANY,	NO_SPECULATION),
 
 	/* Intel Family 6 */
+	VULNWL_INTEL(TIGERLAKE,			NO_MMIO),
+	VULNWL_INTEL(TIGERLAKE_L,		NO_MMIO),
+	VULNWL_INTEL(ALDERLAKE,			NO_MMIO),
+	VULNWL_INTEL(ALDERLAKE_L,		NO_MMIO),
+
 	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
 	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 	VULNWL_INTEL(ATOM_AIRMONT_NP,		NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
-	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
 
 	/*
 	 * Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
 
 	/* AMD Family 0xf - 0x12 */
-	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
 
 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
-	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
 
 	/* Zhaoxin Family 7 */
-	VULNWL(CENTAUR,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS),
-	VULNWL(ZHAOXIN,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS),
+	VULNWL(CENTAUR,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+	VULNWL(ZHAOXIN,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
 	{}
 };
 
@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	 * Affected CPU list is generally enough to enumerate the vulnerability,
 	 * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
 	 * not want the guest to enumerate the bug.
+	 *
+	 * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
+	 * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
 	 */
-	if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
-	    !arch_cap_mmio_immune(ia32_cap))
-		setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+	if (!arch_cap_mmio_immune(ia32_cap)) {
+		if (cpu_matches(cpu_vuln_blacklist, MMIO))
+			setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+		else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
+			setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
+	}
 
 	if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
 		if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 63dc626627a0..a428c62330d3 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -701,7 +701,13 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage
 void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
 					 unsigned int npages)
 {
-	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+	/*
+	 * This can be invoked in early boot while running identity mapped, so
+	 * use an open coded check for SNP instead of using cc_platform_has().
+	 * This eliminates worries about jump tables or checking boot_cpu_data
+	 * in the cc_platform_has() function.
+	 */
+	if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
 		return;
 
 	 /*
@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
 void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
 					unsigned int npages)
 {
-	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+	/*
+	 * This can be invoked in early boot while running identity mapped, so
+	 * use an open coded check for SNP instead of using cc_platform_has().
+	 * This eliminates worries about jump tables or checking boot_cpu_data
+	 * in the cc_platform_has() function.
+	 */
+	if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
 		return;
 
 	/* Invalidate the memory pages before they are marked shared in the RMP table. */
@@ -2100,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp)
 	return true;
 }
 
-void __init snp_abort(void)
+void __init __noreturn snp_abort(void)
 {
 	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
 }
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 38185aedf7d1..0ea57da92940 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip);
 static struct orc_entry *orc_ftrace_find(unsigned long ip)
 {
 	struct ftrace_ops *ops;
-	unsigned long caller;
+	unsigned long tramp_addr, offset;
 
 	ops = ftrace_ops_trampoline(ip);
 	if (!ops)
 		return NULL;
 
+	/* Set tramp_addr to the start of the code copied by the trampoline */
 	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
-		caller = (unsigned long)ftrace_regs_call;
+		tramp_addr = (unsigned long)ftrace_regs_caller;
 	else
-		caller = (unsigned long)ftrace_call;
+		tramp_addr = (unsigned long)ftrace_caller;
+
+	/* Now place tramp_addr to the location within the trampoline ip is at */
+	offset = ip - ops->trampoline;
+	tramp_addr += offset;
 
 	/* Prevent unlikely recursion */
-	if (ip == caller)
+	if (ip == tramp_addr)
 		return NULL;
 
-	return orc_find(caller);
+	return orc_find(tramp_addr);
 }
 #else
 static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64ddd35e..66a209f7eb86 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -62,6 +62,7 @@
 
 static bool __read_mostly pat_bp_initialized;
 static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
 static bool __read_mostly pat_bp_enabled;
 static bool __read_mostly pat_cm_initialized;
 
@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
 static int __init nopat(char *str)
 {
 	pat_disable("PAT support disabled via boot option.");
+	pat_force_disabled = true;
 	return 0;
 }
 early_param("nopat", nopat);
@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
 	wrmsrl(MSR_IA32_CR_PAT, pat);
 }
 
-void init_cache_modes(void)
+void __init init_cache_modes(void)
 {
 	u64 pat = 0;
 
@@ -313,6 +315,12 @@ void init_cache_modes(void)
 		 */
 		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
 		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+	} else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+		/*
+		 * Clearly PAT is enabled underneath. Allow pat_enabled() to
+		 * reflect this.
+		 */
+		pat_bp_enabled = true;
 	}
 
 	__init_cache_modes(pat);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0cec74da7ffe..ad51689dfb41 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -162,32 +162,34 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 
 	/*
 	 * Unfortunately these have to be hard coded because the noreturn
-	 * attribute isn't provided in ELF data.
+	 * attribute isn't provided in ELF data. Keep 'em sorted.
 	 */
 	static const char * const global_noreturns[] = {
+		"__invalid_creds",
+		"__module_put_and_kthread_exit",
+		"__reiserfs_panic",
 		"__stack_chk_fail",
-		"panic",
+		"__ubsan_handle_builtin_unreachable",
+		"cpu_bringup_and_idle",
+		"cpu_startup_entry",
 		"do_exit",
+		"do_group_exit",
 		"do_task_dead",
-		"kthread_exit",
-		"make_task_dead",
-		"__module_put_and_kthread_exit",
+		"ex_handler_msr_mce",
+		"fortify_panic",
 		"kthread_complete_and_exit",
-		"__reiserfs_panic",
+		"kthread_exit",
+		"kunit_try_catch_throw",
 		"lbug_with_loc",
-		"fortify_panic",
-		"usercopy_abort",
 		"machine_real_restart",
+		"make_task_dead",
+		"panic",
 		"rewind_stack_and_make_dead",
-		"kunit_try_catch_throw",
-		"xen_start_kernel",
-		"cpu_bringup_and_idle",
-		"do_group_exit",
+		"sev_es_terminate",
+		"snp_abort",
 		"stop_this_cpu",
-		"__invalid_creds",
-		"cpu_startup_entry",
-		"__ubsan_handle_builtin_unreachable",
-		"ex_handler_msr_mce",
+		"usercopy_abort",
+		"xen_start_kernel",
 	};
 
 	if (!func)

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2022-08-06 19:29 Ingo Molnar
@ 2022-08-07  0:50 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2022-08-07  0:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Dave Hansen, x86, H. Peter Anvin

The pull request you sent on Sat, 6 Aug 2022 21:29:39 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-06

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1612c382ffbdf1f673caec76502b1c00e6d35363

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2022-08-06 19:29 Ingo Molnar
  2022-08-07  0:50 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2022-08-06 19:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Dave Hansen, x86, H. Peter Anvin

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-06

   # HEAD: de979c83574abf6e78f3fa65b716515c91b2613d x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y

Misc fixes:

 - an old(er) binutils build fix,
 - a new-GCC build fix,
 - and a kexec boot environment fix.

NOTE, there's a new conflict in arch/x86/um/Makefile, due to these two 
commits interacting:

  upstream:      # 5b301409e8bc UML: add support for KASAN under x86_64
                 # adds two new targets to the single subarch-y line

  x86/urgent:    # de979c83574a x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y
                 # splits the ../entry/thunk_64.o target from the subarch-y line

it's a context conflict only, my resolution was:

  subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/memmove_64.o ../lib/memset_64.o
  subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o

Thanks,

	Ingo

------------------>
Andrea Righi (1):
      x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y

Chenyi Qiang (1):
      x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero

Siddh Raman Pant (1):
      x86/numa: Use cpumask_available instead of hardcoded NULL check


 arch/x86/entry/Makefile     |  3 ++-
 arch/x86/entry/thunk_32.S   |  2 --
 arch/x86/entry/thunk_64.S   |  4 ----
 arch/x86/kernel/cpu/intel.c | 27 ++++++++++++++-------------
 arch/x86/mm/numa.c          |  4 ++--
 arch/x86/um/Makefile        |  3 ++-
 6 files changed, 20 insertions(+), 23 deletions(-)

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2021-03-28 10:44 Ingo Molnar
@ 2021-03-28 19:22 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2021-03-28 19:22 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 28 Mar 2021 12:44:42 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2021-03-28

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/36a14638f7c06546717cc1316fcfee6da42b98cc

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2021-03-28 10:44 Ingo Molnar
  2021-03-28 19:22 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2021-03-28 10:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2021-03-28

   # HEAD: 9fcb51c14da2953de585c5c6e50697b8a6e91a7b x86/build: Turn off -fcf-protection for realmode targets

Two fixes:

 - Fix build failure on Ubuntu with new GCC packages that turn on -fcf-protection

 - Fix SME memory encryption PTE encoding bug - AFAICT the code worked on
   4K page sizes (level 1) but had the wrong shift at higher page level orders
   (level 2 and higher).

Signed-off-by: Ingo Molnar <mingo@kernel.org>
 Thanks,

	Ingo

------------------>
Arnd Bergmann (1):
      x86/build: Turn off -fcf-protection for realmode targets

Isaku Yamahata (1):
      x86/mem_encrypt: Correct physical address calculation in __set_clr_pte_enc()


 arch/x86/Makefile         | 2 +-
 arch/x86/mm/mem_encrypt.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d6d5a28c3bf..9a85eae37b17 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,7 +27,7 @@ endif
 REALMODE_CFLAGS	:= -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
 		   -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
 		   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-		   -mno-mmx -mno-sse
+		   -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
 
 REALMODE_CFLAGS += -ffreestanding
 REALMODE_CFLAGS += -fno-stack-protector
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 4b01f7dbaf30..ae78cef79980 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -262,7 +262,7 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 	if (pgprot_val(old_prot) == pgprot_val(new_prot))
 		return;
 
-	pa = pfn << page_level_shift(level);
+	pa = pfn << PAGE_SHIFT;
 	size = page_level_size(level);
 
 	/*

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-10-11 18:00 ` Linus Torvalds
@ 2020-10-11 20:00   ` Thomas Gleixner
  0 siblings, 0 replies; 539+ messages in thread
From: Thomas Gleixner @ 2020-10-11 20:00 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Linux Kernel Mailing List, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

On Sun, Oct 11 2020 at 11:00, Linus Torvalds wrote:
> On Sun, Oct 11, 2020 at 1:09 AM Ingo Molnar <mingo@kernel.org> wrote:
>>
>>  - Fix a (hopefully final) IRQ state tracking bug vs. MCE handling
>
> Why is the nmi_enter/nmi_exit thing not a problem on non-x86 architectures?
>
> Put another way: x86 does extra work to track IRQ state across NMI.
> What makes x86 special? It _feels_ to me like everybody should do
> that? No?
>
> Please tell me what I've missed..

Not a lot. The nmi related tracking plus the entry/exit inconsistencies
which we identified and fixed over the past couple of releases in x86
apply pretty much to all of arch/* in one way or the other and of course
in different flavours.

This is worked on by architecture people and will hopefully be solved at
some point by moving all architectures to the generic entry code.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-10-11  8:08 Ingo Molnar
  2020-10-11 18:00 ` Linus Torvalds
@ 2020-10-11 18:23 ` pr-tracker-bot
  1 sibling, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-10-11 18:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 11 Oct 2020 10:08:59 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-10-11

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/c120ec12e216225f7536cc1b924fe428ff64b5bd

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-10-11  8:08 Ingo Molnar
@ 2020-10-11 18:00 ` Linus Torvalds
  2020-10-11 20:00   ` Thomas Gleixner
  2020-10-11 18:23 ` pr-tracker-bot
  1 sibling, 1 reply; 539+ messages in thread
From: Linus Torvalds @ 2020-10-11 18:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linux Kernel Mailing List, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

On Sun, Oct 11, 2020 at 1:09 AM Ingo Molnar <mingo@kernel.org> wrote:
>
>  - Fix a (hopefully final) IRQ state tracking bug vs. MCE handling

Why is the nmi_enter/nmi_exit thing not a problem on non-x86 architectures?

Put another way: x86 does extra work to track IRQ state across NMI.
What makes x86 special? It _feels_ to me like everybody should do
that? No?

Please tell me what I've missed..

                   Linus

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-10-11  8:08 Ingo Molnar
  2020-10-11 18:00 ` Linus Torvalds
  2020-10-11 18:23 ` pr-tracker-bot
  0 siblings, 2 replies; 539+ messages in thread
From: Ingo Molnar @ 2020-10-11  8:08 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-10-11

   # HEAD: 0c7689830e907668288a1a1da84dca66dbdb4728 Documentation/x86: Fix incorrect references to zero-page.txt

Two fixes:

 - Fix a (hopefully final) IRQ state tracking bug vs. MCE handling
 - Fix a documentation link

 Thanks,

	Ingo

------------------>
Heinrich Schuchardt (1):
      Documentation/x86: Fix incorrect references to zero-page.txt

Thomas Gleixner (1):
      x86/mce: Use idtentry_nmi_enter/exit()


 Documentation/x86/boot.rst     | 6 +++---
 arch/x86/kernel/cpu/mce/core.c | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index 7fafc7ac00d7..abb9fc164657 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -1342,8 +1342,8 @@ follow::
 
 In addition to read/modify/write the setup header of the struct
 boot_params as that of 16-bit boot protocol, the boot loader should
-also fill the additional fields of the struct boot_params as that
-described in zero-page.txt.
+also fill the additional fields of the struct boot_params as
+described in chapter :doc:`zero-page`.
 
 After setting up the struct boot_params, the boot loader can load the
 32/64-bit kernel in the same way as that of 16-bit boot protocol.
@@ -1379,7 +1379,7 @@ can be calculated as follows::
 In addition to read/modify/write the setup header of the struct
 boot_params as that of 16-bit boot protocol, the boot loader should
 also fill the additional fields of the struct boot_params as described
-in zero-page.txt.
+in chapter :doc:`zero-page`.
 
 After setting up the struct boot_params, the boot loader can load
 64-bit kernel in the same way as that of 16-bit boot protocol, but
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f43a78bde670..fc4f8c04bdb5 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1904,6 +1904,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
 
 static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
 {
+	bool irq_state;
+
 	WARN_ON_ONCE(user_mode(regs));
 
 	/*
@@ -1914,7 +1916,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
 	    mce_check_crashing_cpu())
 		return;
 
-	nmi_enter();
+	irq_state = idtentry_enter_nmi(regs);
 	/*
 	 * The call targets are marked noinstr, but objtool can't figure
 	 * that out because it's an indirect call. Annotate it.
@@ -1925,7 +1927,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
 	if (regs->flags & X86_EFLAGS_IF)
 		trace_hardirqs_on_prepare();
 	instrumentation_end();
-	nmi_exit();
+	idtentry_exit_nmi(regs, irq_state);
 }
 
 static __always_inline void exc_machine_check_user(struct pt_regs *regs)

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-09-06  8:15 Ingo Molnar
@ 2020-09-06 19:14 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-09-06 19:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 6 Sep 2020 10:15:15 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-09-06

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/015b3155c46a089f623c8a2e794ffad84143565d

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-09-06  8:15 Ingo Molnar
  2020-09-06 19:14 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-09-06  8:15 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-09-06

   # HEAD: 4facb95b7adaf77e2da73aafb9ba60996fe42a12 x86/entry: Unbreak 32bit fast syscall

Misc fixes:

 - Fix more generic entry code ABI fallout
 - Fix debug register handling bugs
 - Fix vmalloc mappings on 32-bit kernels
 - Fix kprobes instrumentation output on 32-bit kernels
 - Fix over-eager WARN_ON_ONCE() on !SMAP hardware
 - Fix NUMA debugging
 - Fix Clang related crash on !RETPOLINE kernels

The most complex fixes are only a few days old and some haven't seen 
-next yet, but I didn't think we should delay them.

  out-of-topic modifications in x86-urgent-2020-09-06:
  ------------------------------------------------------
  include/linux/entry-common.h       # 4facb95b7ada: x86/entry: Unbreak 32bit fas
  kernel/entry/common.c              # 4facb95b7ada: x86/entry: Unbreak 32bit fas

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/debug: Allow a single level of #DB recursion

Arvind Sankar (1):
      x86/cmdline: Disable jump tables for cmdline.c

Huang Ying (1):
      x86, fakenuma: Fix invalid starting node ID

Joerg Roedel (1):
      x86/mm/32: Bring back vmalloc faulting on x86_32

Peter Zijlstra (1):
      x86/entry: Fix AC assertion

Thomas Gleixner (1):
      x86/entry: Unbreak 32bit fast syscall

Vamshi K Sthambamkadi (1):
      tracing/kprobes, x86/ptrace: Fix regs argument order for i386


 arch/x86/entry/common.c             | 29 +++++++++-----
 arch/x86/include/asm/entry-common.h | 12 +++++-
 arch/x86/include/asm/ptrace.h       |  2 +-
 arch/x86/kernel/traps.c             | 65 +++++++++++++++----------------
 arch/x86/lib/Makefile               |  2 +-
 arch/x86/mm/fault.c                 | 78 +++++++++++++++++++++++++++++++++++++
 arch/x86/mm/numa_emulation.c        |  2 +-
 include/linux/entry-common.h        | 51 +++++++++++++++++++-----
 kernel/entry/common.c               | 35 ++++++++++++++---
 9 files changed, 213 insertions(+), 63 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 48512c7944e7..2f84c7ca74ea 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
 {
-	unsigned int nr = (unsigned int)regs->orig_ax;
-
 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
 		current_thread_info()->status |= TS_COMPAT;
-	/*
-	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
-	 * orig_ax, the unsigned int return value truncates it.  This may
-	 * or may not be necessary, but it matches the old asm behavior.
-	 */
-	return (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
+	return (unsigned int)regs->orig_ax;
 }
 
 /*
@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
 	unsigned int nr = syscall_32_enter(regs);
 
+	/*
+	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+	 * orig_ax, the unsigned int return value truncates it.  This may
+	 * or may not be necessary, but it matches the old asm behavior.
+	 */
+	nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
 	do_syscall_32_irqs_on(regs, nr);
 	syscall_exit_to_user_mode(regs);
 }
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr	= syscall_32_enter(regs);
+	unsigned int nr = syscall_32_enter(regs);
 	int res;
 
+	/*
+	 * This cannot use syscall_enter_from_user_mode() as it has to
+	 * fetch EBP before invoking any of the syscall entry work
+	 * functions.
+	 */
+	syscall_enter_from_user_mode_prepare(regs);
+
 	instrumentation_begin();
 	/* Fetch EBP from where the vDSO stashed it. */
 	if (IS_ENABLED(CONFIG_X86_64)) {
@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 		return false;
 	}
 
+	/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
+	nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+
 	/* Now this is just like a normal syscall. */
 	do_syscall_32_irqs_on(regs, nr);
 	syscall_exit_to_user_mode(regs);
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index a8f9315b9eae..6fe54b2813c1 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
 		 * state, not the interrupt state as imagined by Xen.
 		 */
 		unsigned long flags = native_save_fl();
-		WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
-				      X86_EFLAGS_NT));
+		unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+		/*
+		 * For !SMAP hardware we patch out CLAC on entry.
+		 */
+		if (boot_cpu_has(X86_FEATURE_SMAP) ||
+		    (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
+			mask |= X86_EFLAGS_AC;
+
+		WARN_ON_ONCE(flags & mask);
 
 		/* We think we came from user mode. Make sure pt_regs agrees. */
 		WARN_ON_ONCE(!user_mode(regs));
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 40aa69d04862..d8324a236696 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 	static const unsigned int argument_offs[] = {
 #ifdef __i386__
 		offsetof(struct pt_regs, ax),
-		offsetof(struct pt_regs, cx),
 		offsetof(struct pt_regs, dx),
+		offsetof(struct pt_regs, cx),
 #define NR_REG_ARGUMENTS 3
 #else
 		offsetof(struct pt_regs, di),
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1f66d2d1e998..81a2fb711091 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
 #endif
 }
 
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
 {
-	/*
-	 * Disable breakpoints during exception handling; recursive exceptions
-	 * are exceedingly 'fun'.
-	 *
-	 * Since this function is NOKPROBE, and that also applies to
-	 * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
-	 * HW_BREAKPOINT_W on our stack)
-	 *
-	 * Entry text is excluded for HW_BP_X and cpu_entry_area, which
-	 * includes the entry stack is excluded for everything.
-	 */
-	*dr7 = local_db_save();
+	unsigned long dr6;
 
 	/*
 	 * The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
 	 *
 	 * Keep it simple: clear DR6 immediately.
 	 */
-	get_debugreg(*dr6, 6);
+	get_debugreg(dr6, 6);
 	set_debugreg(0, 6);
 	/* Filter out all the reserved bits which are preset to 1 */
-	*dr6 &= ~DR6_RESERVED;
-}
+	dr6 &= ~DR6_RESERVED;
 
-static __always_inline void debug_exit(unsigned long dr7)
-{
-	local_db_restore(dr7);
+	return dr6;
 }
 
 /*
@@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 					     unsigned long dr6)
 {
+	/*
+	 * Disable breakpoints during exception handling; recursive exceptions
+	 * are exceedingly 'fun'.
+	 *
+	 * Since this function is NOKPROBE, and that also applies to
+	 * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+	 * HW_BREAKPOINT_W on our stack)
+	 *
+	 * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+	 * includes the entry stack is excluded for everything.
+	 */
+	unsigned long dr7 = local_db_save();
 	bool irq_state = idtentry_enter_nmi(regs);
 	instrumentation_begin();
 
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 
 	instrumentation_end();
 	idtentry_exit_nmi(regs, irq_state);
+
+	local_db_restore(dr7);
 }
 
 static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
 	 */
 	WARN_ON_ONCE(!user_mode(regs));
 
+	/*
+	 * NB: We can't easily clear DR7 here because
+	 * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+	 * user memory, etc.  This means that a recursive #DB is possible.  If
+	 * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+	 * Since we're not on the IST stack right now, everything will be
+	 * fine.
+	 */
+
 	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
 
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
 /* IST stack entry */
 DEFINE_IDTENTRY_DEBUG(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
-	exc_debug_kernel(regs, dr6);
-	debug_exit(dr7);
+	exc_debug_kernel(regs, debug_read_clear_dr6());
 }
 
 /* User entry, runs on regular task stack */
 DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
-	exc_debug_user(regs, dr6);
-	debug_exit(dr7);
+	exc_debug_user(regs, debug_read_clear_dr6());
 }
 #else
 /* 32 bit does not have separate entry points. */
 DEFINE_IDTENTRY_RAW(exc_debug)
 {
-	unsigned long dr6, dr7;
-
-	debug_enter(&dr6, &dr7);
+	unsigned long dr6 = debug_read_clear_dr6();
 
 	if (user_mode(regs))
 		exc_debug_user(regs, dr6);
 	else
 		exc_debug_kernel(regs, dr6);
-
-	debug_exit(dr7);
 }
 #endif
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d46fff11f06f..aa067859a70b 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_cmdline.o = -pg
 endif
 
-CFLAGS_cmdline.o := -fno-stack-protector
+CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
 endif
 
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..6e3e8a124903 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	return pmd_k;
 }
 
+/*
+ *   Handle a fault on the vmalloc or module mapping area
+ *
+ *   This is needed because there is a race condition between the time
+ *   when the vmalloc mapping code updates the PMD to the point in time
+ *   where it synchronizes this update with the other page-tables in the
+ *   system.
+ *
+ *   In this race window another thread/CPU can map an area on the same
+ *   PMD, finds it already present and does not synchronize it with the
+ *   rest of the system yet. As a result v[mz]alloc might return areas
+ *   which are not mapped in every page-table in the system, causing an
+ *   unhandled page-fault when they are accessed.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+	unsigned long pgd_paddr;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	/* Make sure we are in vmalloc area: */
+	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+		return -1;
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_paddr = read_cr3_pa();
+	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+	if (!pmd_k)
+		return -1;
+
+	if (pmd_large(*pmd_k))
+		return 0;
+
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+
+	return 0;
+}
+NOKPROBE_SYMBOL(vmalloc_fault);
+
 void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
 	unsigned long addr;
@@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 	 */
 	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
 
+#ifdef CONFIG_X86_32
+	/*
+	 * We can fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * Before doing this on-demand faulting, ensure that the
+	 * fault is not any of the following:
+	 * 1. A fault on a PTE with a reserved bit set.
+	 * 2. A fault caused by a user-mode access.  (Do not demand-
+	 *    fault kernel memory due to user-mode accesses).
+	 * 3. A fault caused by a page-level protection violation.
+	 *    (A demand fault would be on a non-present page which
+	 *     would have X86_PF_PROT==0).
+	 *
+	 * This is only needed to close a race condition on x86-32 in
+	 * the vmalloc mapping/unmapping code. See the comment above
+	 * vmalloc_fault() for details. On x86-64 the race does not
+	 * exist as the vmalloc mappings don't need to be synchronized
+	 * there.
+	 */
+	if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+		if (vmalloc_fault(address) >= 0)
+			return;
+	}
+#endif
+
 	/* Was the fault spurious, caused by lazy TLB invalidation? */
 	if (spurious_kernel_fault(hw_error_code, address))
 		return;
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index c5174b4e318b..683cd12f4793 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 					      u64 addr, u64 max_addr, u64 size)
 {
 	return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
-			0, NULL, NUMA_NO_NODE);
+			0, NULL, 0);
 }
 
 static int __init setup_emu2phys_nid(int *dfl_phys_nid)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index efebbffcd5cc..159c7476b11b 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
 #endif
 
 /**
- * syscall_enter_from_user_mode - Check and handle work before invoking
- *				 a syscall
+ * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
  * @regs:	Pointer to currents pt_regs
- * @syscall:	The syscall number
  *
  * Invoked from architecture specific syscall entry code with interrupts
  * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct and the subsequent functions can be
- * instrumented.
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This handles lockdep, RCU (context tracking) and tracing state.
+ *
+ * This is invoked when there is extra architecture specific functionality
+ * to be done between establishing state and handling user mode entry work.
+ */
+void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
+
+/**
+ * syscall_enter_from_user_mode_work - Check and handle work before invoking
+ *				       a syscall
+ * @regs:	Pointer to currents pt_regs
+ * @syscall:	The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
+ * architecture specific work.
  *
  * Returns: The original or a modified syscall number
  *
@@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
  * syscall_set_return_value() first.  If neither of those are called and -1
  * is returned, then the syscall will fail with ENOSYS.
  *
- * The following functionality is handled here:
+ * It handles the following work items:
  *
- *  1) Establish state (lockdep, RCU (context tracking), tracing)
- *  2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
+ *  1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
  *     __secure_computing(), trace_sys_enter()
- *  3) Invocation of audit_syscall_entry()
+ *  2) Invocation of audit_syscall_entry()
+ */
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
+
+/**
+ * syscall_enter_from_user_mode - Establish state and check and handle work
+ *				  before invoking a syscall
+ * @regs:	Pointer to currents pt_regs
+ * @syscall:	The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * disabled. The calling code has to be non-instrumentable. When the
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This is combination of syscall_enter_from_user_mode_prepare() and
+ * syscall_enter_from_user_mode_work().
+ *
+ * Returns: The original or a modified syscall number. See
+ * syscall_enter_from_user_mode_work() for further explanation.
  */
 long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
 
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index fcae019158ca..18683598edbc 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
 	return ret ? : syscall_get_nr(current, regs);
 }
 
-noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+static __always_inline long
+__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
 {
 	unsigned long ti_work;
 
-	enter_from_user_mode(regs);
-	instrumentation_begin();
-
-	local_irq_enable();
 	ti_work = READ_ONCE(current_thread_info()->flags);
 	if (ti_work & SYSCALL_ENTER_WORK)
 		syscall = syscall_trace_enter(regs, syscall, ti_work);
-	instrumentation_end();
 
 	return syscall;
 }
 
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
+{
+	return __syscall_enter_from_user_work(regs, syscall);
+}
+
+noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+{
+	long ret;
+
+	enter_from_user_mode(regs);
+
+	instrumentation_begin();
+	local_irq_enable();
+	ret = __syscall_enter_from_user_work(regs, syscall);
+	instrumentation_end();
+
+	return ret;
+}
+
+noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
+{
+	enter_from_user_mode(regs);
+	instrumentation_begin();
+	local_irq_enable();
+	instrumentation_end();
+}
+
 /**
  * exit_to_user_mode - Fixup state when exiting to user mode
  *

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-08-15 11:45 Ingo Molnar
@ 2020-08-16  1:55 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-08-16  1:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 15 Aug 2020 13:45:52 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-08-15

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/50f6c7dbd973092d8e5f3c89f29eb4bea19fdebd

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-08-15 11:45 Ingo Molnar
  2020-08-16  1:55 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-08-15 11:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-08-15

   # HEAD: a6d996cbd38b42341ad3fce74506b9fdc280e395 x86/alternatives: Acquire pte lock with interrupts enabled

Misc fixes and small updates all around the place:

 - Fix mitigation state sysfs output
 - Fix an FPU xstate/sxave code assumption bug triggered by Architectural LBR support
 - Fix Lightning Mountain SoC TSC frequency enumeration bug
 - Fix kexec debug output
 - Fix kexec memory range assumption bug
 - Fix a boundary condition in the crash kernel code

 - Optimize porgatory.ro generation a bit
 - Enable ACRN guests to use X2APIC mode
 - Reduce a __text_poke() IRQs-off critical section for the benefit of PREEMPT_RT

 Thanks,

	Ingo

------------------>
Dilip Kota (1):
      x86/tsr: Fix tsc frequency enumeration bug on Lightning Mountain SoC

Kan Liang (1):
      x86/fpu/xstate: Fix an xstate size check warning with architectural LBRs

Lianbo Jiang (3):
      x86/crash: Correct the address boundary of function parameters
      kexec: Improve & fix crash_exclude_mem_range() to handle overlapping ranges
      kexec_file: Correctly output debugging information for the PT_LOAD ELF header

Pawan Gupta (1):
      x86/bugs/multihit: Fix mitigation reporting when VMX is not in use

Pingfan Liu (1):
      x86/purgatory: Don't generate debug info for purgatory.ro

Sebastian Andrzej Siewior (1):
      x86/alternatives: Acquire pte lock with interrupts enabled

Shuo Liu (2):
      x86/acrn: Allow ACRN guest to use X2APIC mode
      x86/acrn: Remove redundant chars from ACRN signature


 Documentation/admin-guide/hw-vuln/multihit.rst |  4 +++
 arch/x86/kernel/alternative.c                  |  6 ++--
 arch/x86/kernel/cpu/acrn.c                     | 12 +++-----
 arch/x86/kernel/cpu/bugs.c                     |  8 ++++-
 arch/x86/kernel/crash.c                        |  2 +-
 arch/x86/kernel/fpu/xstate.c                   | 33 ++++++++++++++++++++-
 arch/x86/kernel/tsc_msr.c                      |  9 ++++--
 arch/x86/purgatory/Makefile                    |  5 +++-
 kernel/kexec_file.c                            | 41 ++++++++++++++++----------
 9 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
index ba9988d8bce5..140e4cec38c3 100644
--- a/Documentation/admin-guide/hw-vuln/multihit.rst
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
@@ -80,6 +80,10 @@ The possible values in this file are:
        - The processor is not vulnerable.
      * - KVM: Mitigation: Split huge pages
        - Software changes mitigate this issue.
+     * - KVM: Mitigation: VMX unsupported
+       - KVM is not vulnerable because Virtual Machine Extensions (VMX) is not supported.
+     * - KVM: Mitigation: VMX disabled
+       - KVM is not vulnerable because Virtual Machine Extensions (VMX) is disabled.
      * - KVM: Vulnerable
        - The processor is vulnerable, but no mitigation enabled
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c826cddae157..34a1b8562c31 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -874,8 +874,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
 
-	local_irq_save(flags);
-
 	/*
 	 * Map the page without the global bit, as TLB flushing is done with
 	 * flush_tlb_mm_range(), which is intended for non-global PTEs.
@@ -892,6 +890,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	VM_BUG_ON(!ptep);
 
+	local_irq_save(flags);
+
 	pte = mk_pte(pages[0], pgprot);
 	set_pte_at(poking_mm, poking_addr, ptep, pte);
 
@@ -941,8 +941,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	BUG_ON(memcmp(addr, opcode, len));
 
-	pte_unmap_unlock(ptep, ptl);
 	local_irq_restore(flags);
+	pte_unmap_unlock(ptep, ptl);
 	return addr;
 }
 
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
index 1da9b1c9a2db..0b2c03943ac6 100644
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -11,14 +11,15 @@
 
 #include <linux/interrupt.h>
 #include <asm/apic.h>
+#include <asm/cpufeatures.h>
 #include <asm/desc.h>
 #include <asm/hypervisor.h>
 #include <asm/idtentry.h>
 #include <asm/irq_regs.h>
 
-static uint32_t __init acrn_detect(void)
+static u32 __init acrn_detect(void)
 {
-	return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+	return hypervisor_cpuid_base("ACRNACRNACRN", 0);
 }
 
 static void __init acrn_init_platform(void)
@@ -29,12 +30,7 @@ static void __init acrn_init_platform(void)
 
 static bool acrn_x2apic_available(void)
 {
-	/*
-	 * x2apic is not supported for now. Future enablement will have to check
-	 * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
-	 * guest.
-	 */
-	return false;
+	return boot_cpu_has(X86_FEATURE_X2APIC);
 }
 
 static void (*acrn_intr_handler)(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f0b743a2fe9c..d3f0db463f96 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -31,6 +31,7 @@
 #include <asm/intel-family.h>
 #include <asm/e820/api.h>
 #include <asm/hypervisor.h>
+#include <asm/tlbflush.h>
 
 #include "cpu.h"
 
@@ -1549,7 +1550,12 @@ static ssize_t l1tf_show_state(char *buf)
 
 static ssize_t itlb_multihit_show_state(char *buf)
 {
-	if (itlb_multihit_kvm_mitigation)
+	if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+	    !boot_cpu_has(X86_FEATURE_VMX))
+		return sprintf(buf, "KVM: Mitigation: VMX unsupported\n");
+	else if (!(cr4_read_shadow() & X86_CR4_VMXE))
+		return sprintf(buf, "KVM: Mitigation: VMX disabled\n");
+	else if (itlb_multihit_kvm_mitigation)
 		return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
 	else
 		return sprintf(buf, "KVM: Vulnerable\n");
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index fd87b59452a3..a8f3af257e26 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -230,7 +230,7 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
 	int ret = 0;
 
 	/* Exclude the low 1M because it is always reserved */
-	ret = crash_exclude_mem_range(cmem, 0, 1<<20);
+	ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
 	if (ret)
 		return ret;
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index be2a68a09d19..6073e342a1ed 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -611,6 +611,10 @@ static void check_xstate_against_struct(int nr)
  * This essentially double-checks what the cpu told us about
  * how large the XSAVE buffer needs to be.  We are recalculating
  * it to be safe.
+ *
+ * Dynamic XSAVE features allocate their own buffers and are not
+ * covered by these checks. Only the size of the buffer for task->fpu
+ * is checked here.
  */
 static void do_extra_xstate_size_checks(void)
 {
@@ -673,6 +677,33 @@ static unsigned int __init get_xsaves_size(void)
 	return ebx;
 }
 
+/*
+ * Get the total size of the enabled xstates without the dynamic supervisor
+ * features.
+ */
+static unsigned int __init get_xsaves_size_no_dynamic(void)
+{
+	u64 mask = xfeatures_mask_dynamic();
+	unsigned int size;
+
+	if (!mask)
+		return get_xsaves_size();
+
+	/* Disable dynamic features. */
+	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+
+	/*
+	 * Ask the hardware what size is required of the buffer.
+	 * This is the size required for the task->fpu buffer.
+	 */
+	size = get_xsaves_size();
+
+	/* Re-enable dynamic features so XSAVES will work on them again. */
+	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+
+	return size;
+}
+
 static unsigned int __init get_xsave_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -710,7 +741,7 @@ static int __init init_xstate_size(void)
 	xsave_size = get_xsave_size();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		possible_xstate_size = get_xsaves_size();
+		possible_xstate_size = get_xsaves_size_no_dynamic();
 	else
 		possible_xstate_size = xsave_size;
 
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 4fec6f3a1858..a654a9b4b77c 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -133,10 +133,15 @@ static const struct freq_desc freq_desc_ann = {
 	.mask = 0x0f,
 };
 
-/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
+/*
+ * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz
+ * Frequency step for Lightning Mountain SoC is fixed to 78 MHz,
+ * so all the frequency entries are 78000.
+ */
 static const struct freq_desc freq_desc_lgm = {
 	.use_msr_plat = true,
-	.freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+	.freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000,
+		   78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
 	.mask = 0x0f,
 };
 
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 088bd764e0b7..d24b43a4451a 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -32,7 +32,7 @@ KCOV_INSTRUMENT := n
 # make up the standalone purgatory.ro
 
 PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
-PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0
 PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
 PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector)
 
@@ -64,6 +64,9 @@ CFLAGS_sha256.o			+= $(PURGATORY_CFLAGS)
 CFLAGS_REMOVE_string.o		+= $(PURGATORY_CFLAGS_REMOVE)
 CFLAGS_string.o			+= $(PURGATORY_CFLAGS)
 
+AFLAGS_REMOVE_setup-x86_$(BITS).o	+= -Wa,-gdwarf-2
+AFLAGS_REMOVE_entry64.o			+= -Wa,-gdwarf-2
+
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 94661d2d13ad..3f7867c1820f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -1157,24 +1157,26 @@ int crash_exclude_mem_range(struct crash_mem *mem,
 			    unsigned long long mstart, unsigned long long mend)
 {
 	int i, j;
-	unsigned long long start, end;
+	unsigned long long start, end, p_start, p_end;
 	struct crash_mem_range temp_range = {0, 0};
 
 	for (i = 0; i < mem->nr_ranges; i++) {
 		start = mem->ranges[i].start;
 		end = mem->ranges[i].end;
+		p_start = mstart;
+		p_end = mend;
 
 		if (mstart > end || mend < start)
 			continue;
 
 		/* Truncate any area outside of range */
 		if (mstart < start)
-			mstart = start;
+			p_start = start;
 		if (mend > end)
-			mend = end;
+			p_end = end;
 
 		/* Found completely overlapping range */
-		if (mstart == start && mend == end) {
+		if (p_start == start && p_end == end) {
 			mem->ranges[i].start = 0;
 			mem->ranges[i].end = 0;
 			if (i < mem->nr_ranges - 1) {
@@ -1185,20 +1187,29 @@ int crash_exclude_mem_range(struct crash_mem *mem,
 					mem->ranges[j].end =
 							mem->ranges[j+1].end;
 				}
+
+				/*
+				 * Continue to check if there are another overlapping ranges
+				 * from the current position because of shifting the above
+				 * mem ranges.
+				 */
+				i--;
+				mem->nr_ranges--;
+				continue;
 			}
 			mem->nr_ranges--;
 			return 0;
 		}
 
-		if (mstart > start && mend < end) {
+		if (p_start > start && p_end < end) {
 			/* Split original range */
-			mem->ranges[i].end = mstart - 1;
-			temp_range.start = mend + 1;
+			mem->ranges[i].end = p_start - 1;
+			temp_range.start = p_end + 1;
 			temp_range.end = end;
-		} else if (mstart != start)
-			mem->ranges[i].end = mstart - 1;
+		} else if (p_start != start)
+			mem->ranges[i].end = p_start - 1;
 		else
-			mem->ranges[i].start = mend + 1;
+			mem->ranges[i].start = p_end + 1;
 		break;
 	}
 
@@ -1235,7 +1246,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
 	unsigned long long notes_addr;
 	unsigned long mstart, mend;
 
-	/* extra phdr for vmcoreinfo elf note */
+	/* extra phdr for vmcoreinfo ELF note */
 	nr_phdr = nr_cpus + 1;
 	nr_phdr += mem->nr_ranges;
 
@@ -1243,7 +1254,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
 	 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
 	 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
 	 * I think this is required by tools like gdb. So same physical
-	 * memory will be mapped in two elf headers. One will contain kernel
+	 * memory will be mapped in two ELF headers. One will contain kernel
 	 * text virtual addresses and other will have __va(physical) addresses.
 	 */
 
@@ -1270,7 +1281,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
 	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf64_Phdr);
 
-	/* Prepare one phdr of type PT_NOTE for each present cpu */
+	/* Prepare one phdr of type PT_NOTE for each present CPU */
 	for_each_present_cpu(cpu) {
 		phdr->p_type = PT_NOTE;
 		notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
@@ -1312,10 +1323,10 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
 		phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
 		phdr->p_align = 0;
 		ehdr->e_phnum++;
-		phdr++;
-		pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+		pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
 			phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
 			ehdr->e_phnum, phdr->p_offset);
+		phdr++;
 	}
 
 	*addr = buf;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-07-25 11:46 Ingo Molnar
@ 2020-07-25 22:30 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-07-25 22:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra

The pull request you sent on Sat, 25 Jul 2020 13:46:37 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-07-25

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fbe0d451bcea569fc0ed3455511a90646c8a9c81

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-07-25 11:46 Ingo Molnar
  2020-07-25 22:30 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-07-25 11:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra

Linus,

Please pull the latest x86/urgent git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-07-25

   # HEAD: d181d2da0141371bbc360eaea78719203e165e1c x86/dumpstack: Dump user space code correctly again

Misc fixes:

 - Fix a section end page alignment assumption that was causing crashes

 - Fix ORC unwinding on freshly forked tasks which haven't executed yet
   and which have empty user task stacks

 - Fix the debug.exception-trace=1 sysctl dumping of user stacks, which
   was broken by recent maccess changes


  out-of-topic modifications in x86-urgent-2020-07-25:
  ------------------------------------------------------
  include/asm-generic/vmlinux.lds.h  # de2b41be8fcc: x86, vmlinux.lds: Page-align

 Thanks,

	Ingo

------------------>
Joerg Roedel (1):
      x86, vmlinux.lds: Page-align end of ..page_aligned sections

Josh Poimboeuf (2):
      x86/unwind/orc: Fix ORC for newly forked tasks
      x86/stacktrace: Fix reliable check for empty user task stacks

Thomas Gleixner (1):
      x86/dumpstack: Dump user space code correctly again


 arch/x86/kernel/dumpstack.c       | 27 +++++++++++++++++----------
 arch/x86/kernel/stacktrace.c      |  5 -----
 arch/x86/kernel/unwind_orc.c      |  8 ++++++--
 arch/x86/kernel/vmlinux.lds.S     |  1 +
 include/asm-generic/vmlinux.lds.h |  5 ++++-
 5 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b037cfa7c0c5..7401cc12c3cc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -71,6 +71,22 @@ static void printk_stack_address(unsigned long address, int reliable,
 	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
+		     unsigned int nbytes)
+{
+	if (!user_mode(regs))
+		return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
+
+	/*
+	 * Make sure userspace isn't trying to trick us into dumping kernel
+	 * memory by pointing the userspace instruction pointer at it.
+	 */
+	if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
+		return -EINVAL;
+
+	return copy_from_user_nmi(buf, (void __user *)src, nbytes);
+}
+
 /*
  * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
  *
@@ -97,17 +113,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
 #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
 	u8 opcodes[OPCODE_BUFSIZE];
 	unsigned long prologue = regs->ip - PROLOGUE_SIZE;
-	bool bad_ip;
-
-	/*
-	 * Make sure userspace isn't trying to trick us into dumping kernel
-	 * memory by pointing the userspace instruction pointer at it.
-	 */
-	bad_ip = user_mode(regs) &&
-		__chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
 
-	if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue,
-					OPCODE_BUFSIZE)) {
+	if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
 		printk("%sCode: Bad RIP value.\n", loglvl);
 	} else {
 		printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6ad43fc44556..2fd698e28e4d 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 			 * or a page fault), which can make frame pointers
 			 * unreliable.
 			 */
-
 			if (IS_ENABLED(CONFIG_FRAME_POINTER))
 				return -EINVAL;
 		}
@@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 	if (unwind_error(&state))
 		return -EINVAL;
 
-	/* Success path for non-user tasks, i.e. kthreads and idle tasks */
-	if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
-		return -EINVAL;
-
 	return 0;
 }
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 7f969b2d240f..ec88bbe08a32 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -440,8 +440,11 @@ bool unwind_next_frame(struct unwind_state *state)
 	/*
 	 * Find the orc_entry associated with the text address.
 	 *
-	 * Decrement call return addresses by one so they work for sibling
-	 * calls and calls to noreturn functions.
+	 * For a call frame (as opposed to a signal frame), state->ip points to
+	 * the instruction after the call.  That instruction's stack layout
+	 * could be different from the call instruction's layout, for example
+	 * if the call was to a noreturn function.  So get the ORC data for the
+	 * call instruction itself.
 	 */
 	orc = orc_find(state->signal ? state->ip : state->ip - 1);
 	if (!orc) {
@@ -662,6 +665,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		state->sp = task->thread.sp;
 		state->bp = READ_ONCE_NOCHECK(frame->bp);
 		state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+		state->signal = (void *)state->ip == ret_from_fork;
 	}
 
 	if (get_stack_info((unsigned long *)state->sp, state->task,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3bfc8dd8a43d..9a03e5b23135 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -358,6 +358,7 @@ SECTIONS
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__bss_start = .;
 		*(.bss..page_aligned)
+		. = ALIGN(PAGE_SIZE);
 		*(BSS_MAIN)
 		BSS_DECRYPTED
 		. = ALIGN(PAGE_SIZE);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..052e0f05a984 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -341,7 +341,8 @@
 
 #define PAGE_ALIGNED_DATA(page_align)					\
 	. = ALIGN(page_align);						\
-	*(.data..page_aligned)
+	*(.data..page_aligned)						\
+	. = ALIGN(page_align);
 
 #define READ_MOSTLY_DATA(align)						\
 	. = ALIGN(align);						\
@@ -737,7 +738,9 @@
 	. = ALIGN(bss_align);						\
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {				\
 		BSS_FIRST_SECTIONS					\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.bss..page_aligned)					\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.dynbss)						\
 		*(BSS_MAIN)						\
 		*(COMMON)						\

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-03-02  8:49 Ingo Molnar
@ 2020-03-03 23:35 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-03-03 23:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Andrew Morton, Peter Zijlstra

The pull request you sent on Mon, 2 Mar 2020 09:49:54 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/2873dc25477f483997c99981cf14b43a0d4f84d4

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-03-02  8:49 Ingo Molnar
  2020-03-03 23:35 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-03-02  8:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Andrew Morton,
	Peter Zijlstra

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: bba42affa732d6fd5bd5c9678e6deacde2de1547 x86/mm: Fix dump_pagetables with Xen PV

Misc fixes: a pkeys fix for a bug that triggers with weird BIOS settings, 
and two Xen PV fixes: a paravirt interface fix, and pagetable dumping 
fix.

 Thanks,

	Ingo

------------------>
Juergen Gross (2):
      x86/ioperm: Add new paravirt function update_io_bitmap()
      x86/mm: Fix dump_pagetables with Xen PV

Sean Christopherson (1):
      x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes


 arch/x86/include/asm/io_bitmap.h      |  9 ++++++++-
 arch/x86/include/asm/paravirt.h       |  7 +++++++
 arch/x86/include/asm/paravirt_types.h |  4 ++++
 arch/x86/kernel/cpu/common.c          |  2 +-
 arch/x86/kernel/paravirt.c            |  5 +++++
 arch/x86/kernel/process.c             |  2 +-
 arch/x86/mm/dump_pagetables.c         |  7 +------
 arch/x86/xen/enlighten_pv.c           | 25 +++++++++++++++++++++++++
 8 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 02c6ef8f7667..07344d82e88e 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -19,7 +19,14 @@ struct task_struct;
 void io_bitmap_share(struct task_struct *tsk);
 void io_bitmap_exit(void);
 
-void tss_update_io_bitmap(void);
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#endif
+
 #else
 static inline void io_bitmap_share(struct task_struct *tsk) { }
 static inline void io_bitmap_exit(void) { }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 86e7317eb31f..694d8daf4983 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 	PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
 }
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_update_io_bitmap(void)
+{
+	PVOP_VCALL0(cpu.update_io_bitmap);
+}
+#endif
+
 static inline void paravirt_activate_mm(struct mm_struct *prev,
 					struct mm_struct *next)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 84812964d3dd..732f62e04ddb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,6 +140,10 @@ struct pv_cpu_ops {
 
 	void (*load_sp0)(unsigned long sp0);
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+	void (*update_io_bitmap)(void);
+#endif
+
 	void (*wbinvd)(void);
 
 	/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52c9bfbbdb2a..4cdb123ff66a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 	 * cpuid bit to be set.  We need to ensure that we
 	 * update that bit in this CPU's "cpu_info".
 	 */
-	get_cpu_cap(c);
+	set_cpu_cap(c, X86_FEATURE_OSPKE);
 }
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 789f5e4f89de..c131ba4e70ef 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
 #include <asm/timer.h>
 #include <asm/special_insns.h>
 #include <asm/tlb.h>
+#include <asm/io_bitmap.h>
 
 /*
  * nop stub, which must not clobber anything *including the stack* to
@@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = {
 	.cpu.iret		= native_iret,
 	.cpu.swapgs		= native_swapgs,
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+	.cpu.update_io_bitmap	= native_tss_update_io_bitmap,
+#endif
+
 	.cpu.start_context_switch	= paravirt_nop,
 	.cpu.end_context_switch		= paravirt_nop,
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 839b5244e3b7..3053c85e0e42 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
 /**
  * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
  */
-void tss_update_io_bitmap(void)
+void native_tss_update_io_bitmap(void)
 {
 	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
 	struct thread_struct *t = &current->thread;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 64229dad7eab..69309cd56fdf 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
 {
 	const struct ptdump_range ptdump_ranges[] = {
 #ifdef CONFIG_X86_64
-
-#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
-#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
-			   normalize_addr_shift)
-
 	{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
-	{normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
+	{GUARD_HOLE_END_ADDR, ~0UL},
 #else
 	{0, ~0UL},
 #endif
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 79409120a603..507f4fb88fa7 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,9 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0)
 	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_update_io_bitmap(void)
+{
+	struct physdev_set_iobitmap iobitmap;
+	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+	native_tss_update_io_bitmap();
+
+	iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+			  tss->x86_tss.io_bitmap_base;
+	if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+		iobitmap.nr_ports = 0;
+	else
+		iobitmap.nr_ports = IO_BITMAP_BITS;
+
+	HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
+
 static void xen_io_delay(void)
 {
 }
@@ -1047,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.write_idt_entry = xen_write_idt_entry,
 	.load_sp0 = xen_load_sp0,
 
+#ifdef CONFIG_X86_IOPL_IOPERM
+	.update_io_bitmap = xen_update_io_bitmap,
+#endif
 	.io_delay = xen_io_delay,
 
 	/* Xen takes care of %gs when switching to usermode for us */

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-01-31 11:52 Ingo Molnar
@ 2020-01-31 19:35 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-01-31 19:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Fri, 31 Jan 2020 12:52:59 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b70a2d6b29f7c5b621bf83f903f26fee5fe28efc

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-01-31 11:52 Ingo Molnar
  2020-01-31 19:35 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-01-31 11:52 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 6bd3357b6181bd38c1a757168a8842e09ec6f3fb Merge branches 'x86/hyperv', 'x86/kdump' and 'x86/misc' into x86/urgent, to pick up single-commit branches

Misc fixes:

 - three fixes and a cleanup for the resctrl code,
 - a HyperV fix,
 - a fix to /proc/kcore contents in live debugging sessions,
 - a fix for the x86 decoder opcode map.

 Thanks,

	Ingo

------------------>
Masami Hiramatsu (1):
      x86/decoder: Add TEST opcode to Group3-2

Omar Sandoval (1):
      x86/crash: Define arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y

Wei Liu (1):
      x86/hyper-v: Add "polling" bit to hv_synic_sint

Xiaochen Shen (4):
      x86/resctrl: Fix use-after-free when deleting resource groups
      x86/resctrl: Fix use-after-free due to inaccurate refcount of rdtgroup
      x86/resctrl: Fix a deadlock due to inaccurate reference
      x86/resctrl: Clean up unused function parameter in mkdir path


 arch/x86/include/asm/hyperv-tlfs.h     |  3 ++-
 arch/x86/kernel/Makefile               |  1 +
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 48 ++++++++++++++++++----------------
 arch/x86/kernel/crash_core_32.c        | 17 ++++++++++++
 arch/x86/kernel/crash_core_64.c        | 24 +++++++++++++++++
 arch/x86/kernel/machine_kexec_32.c     | 12 ---------
 arch/x86/kernel/machine_kexec_64.c     | 19 --------------
 arch/x86/lib/x86-opcode-map.txt        |  2 +-
 tools/arch/x86/lib/x86-opcode-map.txt  |  2 +-
 9 files changed, 71 insertions(+), 57 deletions(-)
 create mode 100644 arch/x86/kernel/crash_core_32.c
 create mode 100644 arch/x86/kernel/crash_core_64.c

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2020-01-18 18:52 Ingo Molnar
@ 2020-01-18 21:05 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2020-01-18 21:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 18 Jan 2020 19:52:58 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/0cc2682d8baaa6c7383e40153afc19239ad28d77

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2020-01-18 18:52 Ingo Molnar
  2020-01-18 21:05 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2020-01-18 18:52 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: a006483b2f97af685f0e60f3a547c9ad4c9b9e94 x86/CPU/AMD: Ensure clearing of SME/SEV features is maintained

Misc fixes:

 - a resctrl fix for uninitialized objects found by debugobjects,

 - a resctrl memory leak fix,

 - fix the unintended re-enabling of the of SME and SEV CPU flags if 
   memory encryption was disabled at bootup via the MSR space.

 Thanks,

	Ingo

------------------>
Qian Cai (1):
      x86/resctrl: Fix an imbalance in domain_remove_cpu()

Shakeel Butt (1):
      x86/resctrl: Fix potential memory leak

Tom Lendacky (1):
      x86/CPU/AMD: Ensure clearing of SME/SEV features is maintained


 arch/x86/kernel/cpu/amd.c              | 4 ++--
 arch/x86/kernel/cpu/resctrl/core.c     | 2 +-
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 90f75e515876..62c30279be77 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -615,9 +615,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 		return;
 
 clear_all:
-		clear_cpu_cap(c, X86_FEATURE_SME);
+		setup_clear_cpu_cap(X86_FEATURE_SME);
 clear_sev:
-		clear_cpu_cap(c, X86_FEATURE_SEV);
+		setup_clear_cpu_cap(X86_FEATURE_SEV);
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 03eb90d00af0..89049b343c7a 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -618,7 +618,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
 		list_del(&d->list);
-		if (is_mbm_enabled())
+		if (r->mon_capable && is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
 		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
 			/*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2e3b06d6bbc6..dac7209a0708 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1741,9 +1741,6 @@ static int set_cache_qos_cfg(int level, bool enable)
 	struct rdt_domain *d;
 	int cpu;
 
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	if (level == RDT_RESOURCE_L3)
 		update = l3_qos_cfg_update;
 	else if (level == RDT_RESOURCE_L2)
@@ -1751,6 +1748,9 @@ static int set_cache_qos_cfg(int level, bool enable)
 	else
 		return -EINVAL;
 
+	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
 	r_l = &rdt_resources_all[level];
 	list_for_each_entry(d, &r_l->domains, list) {
 		/* Pick one CPU from each domain instance to update MSR */

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-12-01 22:22 Ingo Molnar
@ 2019-12-02  4:40 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-12-02  4:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Borislav Petkov, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton, Andy Lutomirski

The pull request you sent on Sun, 1 Dec 2019 23:22:08 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e5b3fc125d768eacd73bb4dc5019f0ce95635af4

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-12-01 22:22 Ingo Molnar
  2019-12-02  4:40 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-12-01 22:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Borislav Petkov, Peter Zijlstra, Thomas Gleixner,
	Andrew Morton, Andy Lutomirski

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 91298f1a302dad0f0f630413c812818636faa8a0 x86/mm/pat: Fix off-by-one bugs in interval tree search

Various fixes:

 - Fix the PAT performance regression that downgraded write-combining 
   device memory regions to uncached.

 - There's been a number of bugs in 32-bit double fault handling - 
   hopefully all fixed now.

 - Fix an LDT crash

 - Fix an FPU over-optimization that broke with GCC9 code optimizations.

 - Misc cleanups

 Thanks,

	Ingo

------------------>
Andy Lutomirski (9):
      selftests/x86/single_step_syscall: Check SYSENTER directly
      lkdtm: Add a DOUBLE_FAULT crash type on x86
      x86/traps: Disentangle the 32-bit and 64-bit doublefault code
      x86/doublefault/32: Rename doublefault.c to doublefault_32.c
      x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area
      x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit
      x86/traps: die() instead of panicking on a double fault
      x86/ptrace: Remove set_segment_reg() implementations for current
      x86/ptrace: Document FSBASE and GSBASE ABI oddities

Borislav Petkov (2):
      x86/entry/32: Remove unused 'restore_all_notrace' local label
      x86/ioperm: Save an indentation level in tss_update_io_bitmap()

Ingo Molnar (1):
      x86/mm/pat: Fix off-by-one bugs in interval tree search

Joerg Roedel (1):
      x86/mm/32: Sync only to VMALLOC_END in vmalloc_sync_all()

Sebastian Andrzej Siewior (1):
      x86/fpu: Don't cache access to fpu_fpregs_owner_ctx


 arch/x86/Kconfig.debug                            |   2 +-
 arch/x86/entry/entry_32.S                         |  43 ++++++-
 arch/x86/include/asm/cpu_entry_area.h             |  12 ++
 arch/x86/include/asm/doublefault.h                |  13 +++
 arch/x86/include/asm/fpu/internal.h               |   2 +-
 arch/x86/include/asm/pgtable_32_types.h           |   7 +-
 arch/x86/include/asm/processor.h                  |   2 -
 arch/x86/include/asm/traps.h                      |   3 +
 arch/x86/kernel/Makefile                          |   4 +-
 arch/x86/kernel/cpu/common.c                      |  12 +-
 arch/x86/kernel/doublefault.c                     |  86 --------------
 arch/x86/kernel/doublefault_32.c                  | 136 ++++++++++++++++++++++
 arch/x86/kernel/dumpstack_32.c                    |  30 +++++
 arch/x86/kernel/process.c                         |  52 ++++-----
 arch/x86/kernel/ptrace.c                          |  36 ++++--
 arch/x86/kernel/traps.c                           |  31 +++--
 arch/x86/mm/cpu_entry_area.c                      |  14 ++-
 arch/x86/mm/fault.c                               |   2 +-
 arch/x86/mm/pat_interval.c                        |  12 +-
 drivers/misc/lkdtm/bugs.c                         |  39 +++++++
 drivers/misc/lkdtm/core.c                         |   3 +
 drivers/misc/lkdtm/lkdtm.h                        |   3 +
 tools/testing/selftests/x86/single_step_syscall.c |  94 +++++++++++++--
 23 files changed, 467 insertions(+), 171 deletions(-)
 create mode 100644 arch/x86/include/asm/doublefault.h
 delete mode 100644 arch/x86/kernel/doublefault.c
 create mode 100644 arch/x86/kernel/doublefault_32.c


^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-11-16 21:42 Ingo Molnar
@ 2019-11-17  0:35 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-11-17  0:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 16 Nov 2019 22:42:43 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fe30021c36fbfb71d6ff25a424342149e58bba52

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-11-16 21:42 Ingo Molnar
  2019-11-17  0:35 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-11-16 21:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c8eafe1495303bfd0eedaa8156b1ee9082ee9642 x86/resctrl: Fix potential lockdep warning

Two fixes: disable unreliable HPET on Intel Coffe Lake platforms, and fix 
a lockdep splat in the resctrl code.

 Thanks,

	Ingo

------------------>
Kai-Heng Feng (1):
      x86/quirks: Disable HPET on Intel Coffe Lake platforms

Xiaochen Shen (1):
      x86/resctrl: Fix potential lockdep warning


 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ----
 arch/x86/kernel/early-quirks.c         | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..2e3b06d6bbc6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	}
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	rdt_last_cmd_clear();
 	if (!rdtgrp) {
 		ret = -ENOENT;
-		rdt_last_cmd_puts("Directory was removed\n");
 		goto unlock;
 	}
 
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	int ret;
 
 	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
-	rdt_last_cmd_clear();
 	if (!prdtgrp) {
 		ret = -ENODEV;
-		rdt_last_cmd_puts("Directory was removed\n");
 		goto out_unlock;
 	}
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6f6b1d04dadf..4cba91ec8049 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_INTEL, 0x3ec4,
+		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
 	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-10-12 13:19 Ingo Molnar
@ 2019-10-12 22:35 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-10-12 22:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 12 Oct 2019 15:19:16 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7a275fd7b9519b5cc63270a8964055aadb04de26

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-10-12 13:19 Ingo Molnar
  2019-10-12 22:35 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-10-12 13:19 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 8d7c6ac3b2371eb1cbc9925a88f4d10efff374de x86/cpu: Add Comet Lake to the Intel CPU models header

A handful of fixes: a kexec linking fix, an AMD MWAITX fix, a vmware 
guest support fix when built under Clang, and new CPU model number 
definitions.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/string.h             # bec500777089: lib/string: Make memzero_exp
  lib/string.c                       # bec500777089: lib/string: Make memzero_exp

 Thanks,

	Ingo

------------------>
Arvind Sankar (1):
      lib/string: Make memzero_explicit() inline instead of external

Janakarajan Natarajan (1):
      x86/asm: Fix MWAITX C-state hint value

Kan Liang (1):
      x86/cpu: Add Comet Lake to the Intel CPU models header

Sami Tolvanen (1):
      x86/cpu/vmware: Use the full form of INL in VMWARE_PORT


 arch/x86/include/asm/intel-family.h |  3 +++
 arch/x86/include/asm/mwait.h        |  2 +-
 arch/x86/kernel/cpu/vmware.c        |  2 +-
 arch/x86/lib/delay.c                |  4 ++--
 include/linux/string.h              | 21 ++++++++++++++++++++-
 lib/string.c                        | 21 ---------------------
 6 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index f04622500da3..c606c0b70738 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -83,6 +83,9 @@
 #define INTEL_FAM6_TIGERLAKE_L		0x8C
 #define INTEL_FAM6_TIGERLAKE		0x8D
 
+#define INTEL_FAM6_COMETLAKE		0xA5
+#define INTEL_FAM6_COMETLAKE_L		0xA6
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e28f8b723b5c..9d5252c9685c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -21,7 +21,7 @@
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
 #define MWAITX_MAX_LOOPS		((u32)-1)
-#define MWAITX_DISABLE_CSTATES		0xf
+#define MWAITX_DISABLE_CSTATES		0xf0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 9735139cfdf8..46d732696c1c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,7 +49,7 @@
 #define VMWARE_CMD_VCPU_RESERVED 31
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
-	__asm__("inl (%%dx)" :						\
+	__asm__("inl (%%dx), %%eax" :					\
 		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
 		"a"(VMWARE_HYPERVISOR_MAGIC),				\
 		"c"(VMWARE_CMD_##cmd),					\
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index b7375dc6898f..c126571e5e2e 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops)
 		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
 		/*
-		 * AMD, like Intel, supports the EAX hint and EAX=0xf
-		 * means, do not enter any deep C-state and we use it
+		 * AMD, like Intel's MWAIT version, supports the EAX hint and
+		 * EAX=0xf0 means, do not enter any deep C-state and we use it
 		 * here in delay() to minimize wakeup latency.
 		 */
 		__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
diff --git a/include/linux/string.h b/include/linux/string.h
index b2f9df7f0761..b6ccdc2c7f02 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -227,7 +227,26 @@ static inline bool strstarts(const char *str, const char *prefix)
 }
 
 size_t memweight(const void *ptr, size_t bytes);
-void memzero_explicit(void *s, size_t count);
+
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ *		      keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * Note: usually using memset() is just fine (!), but in cases
+ * where clearing out _local_ data at the end of a scope is
+ * necessary, memzero_explicit() should be used instead in
+ * order to prevent the compiler from optimising away zeroing.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+static inline void memzero_explicit(void *s, size_t count)
+{
+	memset(s, 0, count);
+	barrier_data(s);
+}
 
 /**
  * kbasename - return the last part of a pathname.
diff --git a/lib/string.c b/lib/string.c
index cd7a10c19210..08ec58cc673b 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -748,27 +748,6 @@ void *memset(void *s, int c, size_t count)
 EXPORT_SYMBOL(memset);
 #endif
 
-/**
- * memzero_explicit - Fill a region of memory (e.g. sensitive
- *		      keying data) with 0s.
- * @s: Pointer to the start of the area.
- * @count: The size of the area.
- *
- * Note: usually using memset() is just fine (!), but in cases
- * where clearing out _local_ data at the end of a scope is
- * necessary, memzero_explicit() should be used instead in
- * order to prevent the compiler from optimising away zeroing.
- *
- * memzero_explicit() doesn't need an arch-specific version as
- * it just invokes the one of memset() implicitly.
- */
-void memzero_explicit(void *s, size_t count)
-{
-	memset(s, 0, count);
-	barrier_data(s);
-}
-EXPORT_SYMBOL(memzero_explicit);
-
 #ifndef __HAVE_ARCH_MEMSET16
 /**
  * memset16() - Fill a memory area with a uint16_t

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-09-12 12:57 Ingo Molnar
@ 2019-09-12 14:05 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-09-12 14:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Thu, 12 Sep 2019 14:57:10 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/95217783b7f6f331e7a6675e0a31fb9a5a1b9a36

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-09-12 12:57 Ingo Molnar
  2019-09-12 14:05 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-09-12 12:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: afa8b475c1aec185a8e106c48b3832e0b88bc2de x86/timer: Force PIT initialization when !X86_FEATURE_ARAT

A KVM guest fix, and a kdump kernel relocation errors fix.

 Thanks,

	Ingo

------------------>
Jan Stancek (1):
      x86/timer: Force PIT initialization when !X86_FEATURE_ARAT

Steve Wahl (1):
      x86/purgatory: Change compiler flags from -mcmodel=kernel to -mcmodel=large to fix kexec relocation errors


 arch/x86/kernel/apic/apic.c |  4 ++++
 arch/x86/purgatory/Makefile | 35 +++++++++++++++++++----------------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dba2828b779a..f91b3ff9dc03 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return true;
 
+	/* Virt guests may lack ARAT, but still have DEADLINE */
+	if (!boot_cpu_has(X86_FEATURE_ARAT))
+		return true;
+
 	/* Deadline timer is based on TSC so no further PIT action required */
 	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 		return false;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 8901a1f89cf5..10fb42da0007 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -18,37 +18,40 @@ targets += purgatory.ro
 KASAN_SANITIZE	:= n
 KCOV_INSTRUMENT := n
 
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
 # sure how to relocate those.
 ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_sha256.o		+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_purgatory.o	+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_string.o		+= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_kexec-purgatory.o	+= $(CC_FLAGS_FTRACE)
+PURGATORY_CFLAGS_REMOVE		+= $(CC_FLAGS_FTRACE)
 endif
 
 ifdef CONFIG_STACKPROTECTOR
-CFLAGS_REMOVE_sha256.o		+= -fstack-protector
-CFLAGS_REMOVE_purgatory.o	+= -fstack-protector
-CFLAGS_REMOVE_string.o		+= -fstack-protector
-CFLAGS_REMOVE_kexec-purgatory.o	+= -fstack-protector
+PURGATORY_CFLAGS_REMOVE		+= -fstack-protector
 endif
 
 ifdef CONFIG_STACKPROTECTOR_STRONG
-CFLAGS_REMOVE_sha256.o		+= -fstack-protector-strong
-CFLAGS_REMOVE_purgatory.o	+= -fstack-protector-strong
-CFLAGS_REMOVE_string.o		+= -fstack-protector-strong
-CFLAGS_REMOVE_kexec-purgatory.o	+= -fstack-protector-strong
+PURGATORY_CFLAGS_REMOVE		+= -fstack-protector-strong
 endif
 
 ifdef CONFIG_RETPOLINE
-CFLAGS_REMOVE_sha256.o		+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_purgatory.o	+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_string.o		+= $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_kexec-purgatory.o	+= $(RETPOLINE_CFLAGS)
+PURGATORY_CFLAGS_REMOVE		+= $(RETPOLINE_CFLAGS)
 endif
 
+CFLAGS_REMOVE_purgatory.o	+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o		+= $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o		+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o			+= $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o		+= $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o			+= $(PURGATORY_CFLAGS)
+
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-09-05  8:07 Ingo Molnar
@ 2019-09-05 21:15 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-09-05 21:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Thu, 5 Sep 2019 10:07:19 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/19e4147a04a43d210dbacda76e0988f90bb0ba45

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-09-05  8:07 Ingo Molnar
  2019-09-05 21:15 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-09-05  8:07 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 4030b4c585c41eeefec7bd20ce3d0e100a0f2e4d x86/hyper-v: Fix overflow bug in fill_gva_list()

Misc fixes:

 - a signed kernels EFI boot fix,
 - an UBSAN related AC flags fix,
 - and a Hyper-V infinite loop fix.

 Thanks,

	Ingo

------------------>
John S. Gruber (1):
      x86/boot: Preserve boot_params.secure_boot from sanitizing

Peter Zijlstra (1):
      x86/uaccess: Don't leak the AC flags into __get_user() argument evaluation

Tianyu Lan (1):
      x86/hyper-v: Fix overflow bug in fill_gva_list()


 arch/x86/hyperv/mmu.c                  | 8 +++++---
 arch/x86/include/asm/bootparam_utils.h | 1 +
 arch/x86/include/asm/uaccess.h         | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
 		 * Lower 12 bits encode the number of additional
 		 * pages to flush (in addition to the 'cur' page).
 		 */
-		if (diff >= HV_TLB_FLUSH_UNIT)
+		if (diff >= HV_TLB_FLUSH_UNIT) {
 			gva_list[gva_n] |= ~PAGE_MASK;
-		else if (diff)
+			cur += HV_TLB_FLUSH_UNIT;
+		}  else if (diff) {
 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+			cur = end;
+		}
 
-		cur += HV_TLB_FLUSH_UNIT;
 		gva_n++;
 
 	} while (cur < end);
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 9e5f3c722c33..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -70,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 			BOOT_PARAM_PRESERVE(eddbuf_entries),
 			BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
 			BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+			BOOT_PARAM_PRESERVE(secure_boot),
 			BOOT_PARAM_PRESERVE(hdr),
 			BOOT_PARAM_PRESERVE(e820_table),
 			BOOT_PARAM_PRESERVE(eddbuf),
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..35c225ede0e4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label:							\
 ({									\
 	int __gu_err;							\
 	__inttype(*(ptr)) __gu_val;					\
+	__typeof__(ptr) __gu_ptr = (ptr);				\
+	__typeof__(size) __gu_size = (size);				\
 	__uaccess_begin_nospec();					\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
+	__get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT);	\
 	__uaccess_end();						\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__builtin_expect(__gu_err, 0);					\

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-06-29  9:14 Ingo Molnar
@ 2019-06-29 11:45 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-06-29 11:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 29 Jun 2019 11:14:07 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/728254541ebcc7fee869c3c4c3f36f96be791edb

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-06-29  9:14 Ingo Molnar
  2019-06-29 11:45 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-06-29  9:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ae6a45a0868986f69039a2150d3b2b9ca294c378 x86/unwind/orc: Fall back to using frame pointers for generated code

Misc fixes all over the place:

- might_sleep() atomicity fix in the microcode loader
- resctrl boundary condition fix
- APIC arithmethics bug fix for frequencies >= 4.2 GHz
- three 5-level paging crash fixes
- two speculation fixes
- a perf/stacktrace fix

 Thanks,

	Ingo

------------------>
Alejandro Jimenez (1):
      x86/speculation: Allow guests to use SSBD even if host does not

Colin Ian King (1):
      x86/apic: Fix integer overflow on 10 bit left shift of cpu_khz

Josh Poimboeuf (1):
      x86/unwind/orc: Fall back to using frame pointers for generated code

Kirill A. Shutemov (3):
      x86/boot/64: Fix crash if kernel image crosses page table boundary
      x86/boot/64: Add missing fixup_pointer() for next_early_pgt access
      x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init()

Reinette Chatre (1):
      x86/resctrl: Prevent possible overrun during bitmap operations

Song Liu (1):
      perf/x86: Always store regs->ip in perf_callchain_kernel()

Thomas Gleixner (1):
      x86/microcode: Fix the microcode load on CPU hotplug for real


 arch/x86/events/core.c                 | 10 +++++-----
 arch/x86/kernel/apic/apic.c            |  3 ++-
 arch/x86/kernel/cpu/bugs.c             | 11 ++++++++++-
 arch/x86/kernel/cpu/microcode/core.c   | 15 ++++++++++-----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++++++------------------
 arch/x86/kernel/head64.c               | 20 ++++++++++---------
 arch/x86/kernel/unwind_orc.c           | 26 +++++++++++++++++++++----
 arch/x86/mm/init_64.c                  | 24 ++++++++++++-----------
 8 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..4fb3ca1e699d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 		return;
 	}
 
-	if (perf_hw_regs(regs)) {
-		if (perf_callchain_store(entry, regs->ip))
-			return;
+	if (perf_callchain_store(entry, regs->ip))
+		return;
+
+	if (perf_hw_regs(regs))
 		unwind_start(&state, current, regs, NULL);
-	} else {
+	else
 		unwind_start(&state, current, NULL, (void *)regs->sp);
-	}
 
 	for (; !unwind_done(&state); unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 177aa8ef2afa..85be316665b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void)
 		if (queued) {
 			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
 				ntsc = rdtsc();
-				max_loops = (cpu_khz << 10) - (ntsc - tsc);
+				max_loops = (long long)cpu_khz << 10;
+				max_loops -= ntsc - tsc;
 			} else {
 				max_loops--;
 			}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 		break;
 	}
 
+	/*
+	 * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+	 * bit in the mask to allow guests to use the mitigation even in the
+	 * case where the host does not enable it.
+	 */
+	if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+	    static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+		x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+	}
+
 	/*
 	 * We have three CPU feature flags that are in play here:
 	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 			x86_amd_ssb_disable();
 		} else {
 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
-			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
 	.resume			= mc_bp_resume,
 };
 
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
 {
-	struct device *dev;
-
-	dev = get_cpu_device(cpu);
 	microcode_update_cpu(cpu);
 	pr_debug("CPU%d added\n", cpu);
+	return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
 
 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
 		pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
 		goto out_ucode_group;
 
 	register_syscore_ops(&mc_syscore_ops);
-	cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+	cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+				  mc_cpu_starting, NULL);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
 				  mc_cpu_online, mc_cpu_down_prep);
 
 	pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 869cbef5da81..f9d8ed6ab03b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 			      struct seq_file *seq, void *v)
 {
 	struct rdt_resource *r = of->kn->parent->priv;
-	u32 sw_shareable = 0, hw_shareable = 0;
-	u32 exclusive = 0, pseudo_locked = 0;
+	/*
+	 * Use unsigned long even though only 32 bits are used to ensure
+	 * test_bit() is used safely.
+	 */
+	unsigned long sw_shareable = 0, hw_shareable = 0;
+	unsigned long exclusive = 0, pseudo_locked = 0;
 	struct rdt_domain *dom;
 	int i, hwb, swb, excl, psl;
 	enum rdtgrp_mode mode;
@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 		}
 		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
-			hwb = test_bit(i, (unsigned long *)&hw_shareable);
-			swb = test_bit(i, (unsigned long *)&sw_shareable);
-			excl = test_bit(i, (unsigned long *)&exclusive);
-			psl = test_bit(i, (unsigned long *)&pseudo_locked);
+			hwb = test_bit(i, &hw_shareable);
+			swb = test_bit(i, &sw_shareable);
+			excl = test_bit(i, &exclusive);
+			psl = test_bit(i, &pseudo_locked);
 			if (hwb && swb)
 				seq_putc(seq, 'X');
 			else if (hwb && !swb)
@@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  */
 static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
 {
-	/*
-	 * Convert the u32 _val to an unsigned long required by all the bit
-	 * operations within this function. No more than 32 bits of this
-	 * converted value can be accessed because all bit operations are
-	 * additionally provided with cbm_len that is initialized during
-	 * hardware enumeration using five bits from the EAX register and
-	 * thus never can exceed 32 bits.
-	 */
-	unsigned long *val = (unsigned long *)_val;
+	unsigned long val = *_val;
 	unsigned int cbm_len = r->cache.cbm_len;
 	unsigned long first_bit, zero_bit;
 
-	if (*val == 0)
+	if (val == 0)
 		return;
 
-	first_bit = find_first_bit(val, cbm_len);
-	zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+	first_bit = find_first_bit(&val, cbm_len);
+	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
 	/* Clear any remaining bits to ensure contiguous region */
-	bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+	*_val = (u32)val;
 }
 
 /*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
 
 	if (la57) {
-		p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+		p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
+				    physaddr);
 
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
 		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
 		pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
 
-		i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
-		p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
-		p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
+		i = physaddr >> P4D_SHIFT;
+		p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+		p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
 	} else {
 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
 		pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
 		pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
 	}
 
-	i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
-	pud[i + 0] = (pudval_t)pmd + pgtable_flags;
-	pud[i + 1] = (pudval_t)pmd + pgtable_flags;
+	i = physaddr >> PUD_SHIFT;
+	pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+	pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
 
 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
 	/* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	pmd_entry +=  physaddr;
 
 	for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
-		int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
-		pmd[idx] = pmd_entry + i * PMD_SIZE;
+		int idx = i + (physaddr >> PMD_SHIFT);
+
+		pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
 	}
 
 	/*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b66b5c5aec..72b997eaa1fc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
  * But they are copies of the ftrace entries that are static and
  * defined in ftrace_*.S, which do have orc entries.
  *
- * If the undwinder comes across a ftrace trampoline, then find the
+ * If the unwinder comes across a ftrace trampoline, then find the
  * ftrace function that was used to create it, and use that ftrace
- * function's orc entrie, as the placement of the return code in
+ * function's orc entry, as the placement of the return code in
  * the stack will be identical.
  */
 static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
 	.type = ORC_TYPE_CALL
 };
 
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.type		= ORC_TYPE_CALL,
+	.sp_reg		= ORC_REG_BP,
+	.sp_offset	= 16,
+	.bp_reg		= ORC_REG_PREV_SP,
+	.bp_offset	= -16,
+	.end		= 0,
+};
+
 static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	 * calls and calls to noreturn functions.
 	 */
 	orc = orc_find(state->signal ? state->ip : state->ip - 1);
-	if (!orc)
-		goto err;
+	if (!orc) {
+		/*
+		 * As a fallback, try to assume this code uses a frame pointer.
+		 * This is useful for generated code, like BPF, which ORC
+		 * doesn't know about.  This is just a guess, so the rest of
+		 * the unwind is no longer considered reliable.
+		 */
+		orc = &orc_fp_entry;
+		state->error = true;
+	}
 
 	/* End-of-stack check for kernel threads: */
 	if (orc->sp_reg == ORC_REG_UNDEFINED) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..0f01c7b1d217 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,23 +671,25 @@ static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, bool init)
 {
-	unsigned long paddr_next, paddr_last = paddr_end;
-	unsigned long vaddr = (unsigned long)__va(paddr);
-	int i = p4d_index(vaddr);
+	unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr);
+	vaddr_end = (unsigned long)__va(paddr_end);
 
 	if (!pgtable_l5_enabled())
 		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
 				     page_size_mask, init);
 
-	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
-		p4d_t *p4d;
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		p4d_t *p4d = p4d_page + p4d_index(vaddr);
 		pud_t *pud;
 
-		vaddr = (unsigned long)__va(paddr);
-		p4d = p4d_page + p4d_index(vaddr);
-		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+		vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
+		paddr = __pa(vaddr);
 
 		if (paddr >= paddr_end) {
+			paddr_next = __pa(vaddr_next);
 			if (!after_bootmem &&
 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
 					     E820_TYPE_RAM) &&
@@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 
 		if (!p4d_none(*p4d)) {
 			pud = pud_offset(p4d, 0);
-			paddr_last = phys_pud_init(pud, paddr, paddr_end,
-						   page_size_mask, init);
+			paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
+					page_size_mask, init);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		paddr_last = phys_pud_init(pud, paddr, paddr_end,
+		paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
 					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-06-02 17:44 Ingo Molnar
@ 2019-06-02 18:15 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-06-02 18:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 2 Jun 2019 19:44:42 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7bd1d5edd0160b615ab8748cf94dabcab1fb01cb

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-06-02 17:44 Ingo Molnar
  2019-06-02 18:15 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-06-02 17:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 2ac44ab608705948564791ce1d15d43ba81a1e38 x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor

Two fixes: a quirk for KVM guests running on certain AMD CPUs, and a 
KASAN related build fix.

 Thanks,

	Ingo

------------------>
Ard Biesheuvel (1):
      x86/boot: Provide KASAN compatible aliases for string routines

Frank van der Linden (1):
      x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor


 arch/x86/boot/compressed/string.c | 14 ++++++++++----
 arch/x86/kernel/cpu/amd.c         |  7 +++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19dbbcdd1a53..81fc1eaa3229 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -11,7 +11,7 @@
 #include "../string.c"
 
 #ifdef CONFIG_X86_32
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
 {
 	int d0, d1, d2;
 	asm volatile(
@@ -25,7 +25,7 @@ static void *__memcpy(void *dest, const void *src, size_t n)
 	return dest;
 }
 #else
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
 {
 	long d0, d1, d2;
 	asm volatile(
@@ -56,7 +56,7 @@ void *memmove(void *dest, const void *src, size_t n)
 	const unsigned char *s = src;
 
 	if (d <= s || d - s >= n)
-		return __memcpy(dest, src, n);
+		return ____memcpy(dest, src, n);
 
 	while (n-- > 0)
 		d[n] = s[n];
@@ -71,5 +71,11 @@ void *memcpy(void *dest, const void *src, size_t n)
 		warn("Avoiding potentially unsafe overlapping memcpy()!");
 		return memmove(dest, src, n);
 	}
-	return __memcpy(dest, src, n);
+	return ____memcpy(dest, src, n);
 }
+
+#ifdef CONFIG_KASAN
+extern void *__memset(void *s, int c, size_t n) __alias(memset);
+extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove);
+extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy);
+#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 80a405c2048a..8d4e50428b68 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -824,8 +824,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
 {
 	set_cpu_cap(c, X86_FEATURE_ZEN);
 
-	/* Fix erratum 1076: CPB feature bit not being set in CPUID. */
-	if (!cpu_has(c, X86_FEATURE_CPB))
+	/*
+	 * Fix erratum 1076: CPB feature bit not being set in CPUID.
+	 * Always set it, except when running under a hypervisor.
+	 */
+	if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
 		set_cpu_cap(c, X86_FEATURE_CPB);
 }
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-05-16 16:26 Ingo Molnar
@ 2019-05-16 18:20 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-05-16 18:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Thu, 16 May 2019 18:26:16 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/d396360acdf7e57edcd9e2d080343b0353d65d63

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-05-16 16:26 Ingo Molnar
  2019-05-16 18:20 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-05-16 16:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9d8d0294e78a164d407133dea05caf4b84247d6a x86/speculation/mds: Improve CPU buffer clear documentation

Misc fixes and updates:

 - a handful of MDS documentation/comment updates
 - a cleanup related to hweight interfaces
 - a SEV guest fix for large pages
 - a kprobes LTO fix
 - and a final cleanup commit for vDSO HPET support removal.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  lib/hweight.c                      # 409ca45526a4: x86/kconfig: Disable CONFIG_

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/kprobes: Make trampoline_handler() global and visible

Andy Lutomirski (2):
      x86/speculation/mds: Revert CPU buffer clear on double fault exit
      x86/speculation/mds: Improve CPU buffer clear documentation

Brijesh Singh (1):
      x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page

Jia Zhang (1):
      x86/vdso: Remove hpet_page from vDSO

Masahiro Yamada (1):
      x86/kconfig: Disable CONFIG_GENERIC_HWEIGHT and remove __HAVE_ARCH_SW_HWEIGHT


 Documentation/x86/mds.rst           |  44 ++---------
 arch/x86/Kconfig                    |   3 -
 arch/x86/entry/vdso/vdso2c.c        |   3 -
 arch/x86/include/asm/arch_hweight.h |   2 -
 arch/x86/include/asm/vdso.h         |   1 -
 arch/x86/kernel/kprobes/core.c      |   2 +-
 arch/x86/kernel/traps.c             |   8 --
 arch/x86/mm/init_64.c               | 144 ++++++++++++++++++++++++++----------
 arch/x86/mm/mem_encrypt.c           |  10 ++-
 arch/x86/mm/mm_internal.h           |   3 +
 lib/hweight.c                       |   4 -
 11 files changed, 121 insertions(+), 103 deletions(-)

diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
index 534e9baa4e1d..5d4330be200f 100644
--- a/Documentation/x86/mds.rst
+++ b/Documentation/x86/mds.rst
@@ -142,45 +142,13 @@ Mitigation points
    mds_user_clear.
 
    The mitigation is invoked in prepare_exit_to_usermode() which covers
-   most of the kernel to user space transitions. There are a few exceptions
-   which are not invoking prepare_exit_to_usermode() on return to user
-   space. These exceptions use the paranoid exit code.
+   all but one of the kernel to user space transitions.  The exception
+   is when we return from a Non Maskable Interrupt (NMI), which is
+   handled directly in do_nmi().
 
-   - Non Maskable Interrupt (NMI):
-
-     Access to sensible data like keys, credentials in the NMI context is
-     mostly theoretical: The CPU can do prefetching or execute a
-     misspeculated code path and thereby fetching data which might end up
-     leaking through a buffer.
-
-     But for mounting other attacks the kernel stack address of the task is
-     already valuable information. So in full mitigation mode, the NMI is
-     mitigated on the return from do_nmi() to provide almost complete
-     coverage.
-
-   - Double fault (#DF):
-
-     A double fault is usually fatal, but the ESPFIX workaround, which can
-     be triggered from user space through modify_ldt(2) is a recoverable
-     double fault. #DF uses the paranoid exit path, so explicit mitigation
-     in the double fault handler is required.
-
-   - Machine Check Exception (#MC):
-
-     Another corner case is a #MC which hits between the CPU buffer clear
-     invocation and the actual return to user. As this still is in kernel
-     space it takes the paranoid exit path which does not clear the CPU
-     buffers. So the #MC handler repopulates the buffers to some
-     extent. Machine checks are not reliably controllable and the window is
-     extremly small so mitigation would just tick a checkbox that this
-     theoretical corner case is covered. To keep the amount of special
-     cases small, ignore #MC.
-
-   - Debug Exception (#DB):
-
-     This takes the paranoid exit path only when the INT1 breakpoint is in
-     kernel space. #DB on a user space address takes the regular exit path,
-     so no extra mitigation required.
+   (The reason that NMI is special is that prepare_exit_to_usermode() can
+    enable IRQs.  In NMI context, NMIs are blocked, and we don't want to
+    enable IRQs with NMIs blocked.)
 
 
 2. C-State transition
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 326b2d5bab9d..6bc9dd6e7534 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -259,9 +259,6 @@ config GENERIC_BUG
 config GENERIC_BUG_RELATIVE_POINTERS
 	bool
 
-config GENERIC_HWEIGHT
-	def_bool y
-
 config ARCH_MAY_HAVE_PC_FDC
 	def_bool y
 	depends on ISA_DMA_API
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8e470b018512..3a4d8d4d39f8 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,14 +73,12 @@ const char *outfilename;
 enum {
 	sym_vvar_start,
 	sym_vvar_page,
-	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
 };
 
 const int special_pages[] = {
 	sym_vvar_page,
-	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
 };
@@ -93,7 +91,6 @@ struct vdso_sym {
 struct vdso_sym required_syms[] = {
 	[sym_vvar_start] = {"vvar_start", true},
 	[sym_vvar_page] = {"vvar_page", true},
-	[sym_hpet_page] = {"hpet_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
 	{"VDSO32_NOTE_MASK", true},
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index fc0693569f7a..ba88edd0d58b 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -12,8 +12,6 @@
 #define REG_OUT "a"
 #endif
 
-#define __HAVE_ARCH_SW_HWEIGHT
-
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..230474e2ddb5 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -19,7 +19,6 @@ struct vdso_image {
 	long sym_vvar_start;  /* Negative offset to the vvar area */
 
 	long sym_vvar_page;
-	long sym_hpet_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
 	long sym_VDSO32_NOTE_MASK;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index cf52ee0d8711..9e4fa2484d10 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -768,7 +768,7 @@ static struct kprobe kretprobe_kprobe = {
 /*
  * Called from kretprobe_trampoline
  */
-static __used void *trampoline_handler(struct pt_regs *regs)
+__used __visible void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb;
 	struct kretprobe_instance *ri = NULL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7de466eb960b..8b6d03e55d2f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,7 +58,6 @@
 #include <asm/alternative.h>
 #include <asm/fpu/xstate.h>
 #include <asm/trace/mpx.h>
-#include <asm/nospec-branch.h>
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
@@ -368,13 +367,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 		regs->ip = (unsigned long)general_protection;
 		regs->sp = (unsigned long)&gpregs->orig_ax;
 
-		/*
-		 * This situation can be triggered by userspace via
-		 * modify_ldt(2) and the return does not take the regular
-		 * user space exit, so a CPU buffer clear is required when
-		 * MDS mitigation is enabled.
-		 */
-		mds_user_clear_cpu_buffers();
 		return;
 	}
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 20d14254b686..62fc457f3849 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,6 +58,37 @@
 
 #include "ident_map.c"
 
+#define DEFINE_POPULATE(fname, type1, type2, init)		\
+static inline void fname##_init(struct mm_struct *mm,		\
+		type1##_t *arg1, type2##_t *arg2, bool init)	\
+{								\
+	if (init)						\
+		fname##_safe(mm, arg1, arg2);			\
+	else							\
+		fname(mm, arg1, arg2);				\
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+#define DEFINE_ENTRY(type1, type2, init)			\
+static inline void set_##type1##_init(type1##_t *arg1,		\
+			type2##_t arg2, bool init)		\
+{								\
+	if (init)						\
+		set_##type1##_safe(arg1, arg2);			\
+	else							\
+		set_##type1(arg1, arg2);			\
+}
+
+DEFINE_ENTRY(p4d, p4d, init)
+DEFINE_ENTRY(pud, pud, init)
+DEFINE_ENTRY(pmd, pmd, init)
+DEFINE_ENTRY(pte, pte, init)
+
+
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
@@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
  */
 static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
-	      pgprot_t prot)
+	      pgprot_t prot, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pte_safe(pte, __pte(0));
+				set_pte_init(pte, __pte(0), init);
 			continue;
 		}
 
@@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
 				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		pages++;
-		set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+		set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
 		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
 	}
 
@@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask, pgprot_t prot)
+	      unsigned long page_size_mask, pgprot_t prot, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PMD_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pmd_safe(pmd, __pmd(0));
+				set_pmd_init(pmd, __pmd(0), init);
 			continue;
 		}
 
@@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
 				paddr_last = phys_pte_init(pte, paddr,
-							   paddr_end, prot);
+							   paddr_end, prot,
+							   init);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_2M)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-			set_pte_safe((pte_t *)pmd,
-				pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
-					__pgprot(pgprot_val(prot) | _PAGE_PSE)));
+			set_pte_init((pte_t *)pmd,
+				     pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+					     __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
 			continue;
 		}
 
 		pte = alloc_low_page();
-		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pmd_populate_kernel_safe(&init_mm, pmd, pte);
+		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask)
+	      unsigned long page_size_mask, bool init)
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
@@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & PUD_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_pud_safe(pud, __pud(0));
+				set_pud_init(pud, __pud(0), init);
 			continue;
 		}
 
@@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 				paddr_last = phys_pmd_init(pmd, paddr,
 							   paddr_end,
 							   page_size_mask,
-							   prot);
+							   prot, init);
 				continue;
 			}
 			/*
@@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_1G)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-			set_pte_safe((pte_t *)pud,
-				pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
-					PAGE_KERNEL_LARGE));
+			set_pte_init((pte_t *)pud,
+				     pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+					     PAGE_KERNEL_LARGE),
+				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
 			continue;
@@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 
 		pmd = alloc_low_page();
 		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
-					   page_size_mask, prot);
+					   page_size_mask, prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pud_populate_safe(&init_mm, pud, pmd);
+		pud_populate_init(&init_mm, pud, pmd, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 
@@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 
 static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
-	      unsigned long page_size_mask)
+	      unsigned long page_size_mask, bool init)
 {
 	unsigned long paddr_next, paddr_last = paddr_end;
 	unsigned long vaddr = (unsigned long)__va(paddr);
 	int i = p4d_index(vaddr);
 
 	if (!pgtable_l5_enabled())
-		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+				     page_size_mask, init);
 
 	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
 		p4d_t *p4d;
@@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 					     E820_TYPE_RAM) &&
 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
 					     E820_TYPE_RESERVED_KERN))
-				set_p4d_safe(p4d, __p4d(0));
+				set_p4d_init(p4d, __p4d(0), init);
 			continue;
 		}
 
 		if (!p4d_none(*p4d)) {
 			pud = pud_offset(p4d, 0);
-			paddr_last = phys_pud_init(pud, paddr,
-					paddr_end,
-					page_size_mask);
+			paddr_last = phys_pud_init(pud, paddr, paddr_end,
+						   page_size_mask, init);
 			continue;
 		}
 
 		pud = alloc_low_page();
 		paddr_last = phys_pud_init(pud, paddr, paddr_end,
-					   page_size_mask);
+					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		p4d_populate_safe(&init_mm, p4d, pud);
+		p4d_populate_init(&init_mm, p4d, pud, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 
 	return paddr_last;
 }
 
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
-			     unsigned long paddr_end,
-			     unsigned long page_size_mask)
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
+			       unsigned long paddr_end,
+			       unsigned long page_size_mask,
+			       bool init)
 {
 	bool pgd_changed = false;
 	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 			p4d = (p4d_t *)pgd_page_vaddr(*pgd);
 			paddr_last = phys_p4d_init(p4d, __pa(vaddr),
 						   __pa(vaddr_end),
-						   page_size_mask);
+						   page_size_mask,
+						   init);
 			continue;
 		}
 
 		p4d = alloc_low_page();
 		paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
-					   page_size_mask);
+					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);
 		if (pgtable_l5_enabled())
-			pgd_populate_safe(&init_mm, pgd, p4d);
+			pgd_populate_init(&init_mm, pgd, p4d, init);
 		else
-			p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
+			p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
+					  (pud_t *) p4d, init);
+
 		spin_unlock(&init_mm.page_table_lock);
 		pgd_changed = true;
 	}
@@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 	return paddr_last;
 }
 
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. Note that it can only be used to populate non-present entries.
+ * The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+			     unsigned long paddr_end,
+			     unsigned long page_size_mask)
+{
+	return __kernel_physical_mapping_init(paddr_start, paddr_end,
+					      page_size_mask, true);
+}
+
+/*
+ * This function is similar to kernel_physical_mapping_init() above with the
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
+ * when updating the mapping. The caller is responsible to flush the TLBs after
+ * the function returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+			       unsigned long paddr_end,
+			       unsigned long page_size_mask)
+{
+	return __kernel_physical_mapping_init(paddr_start, paddr_end,
+					      page_size_mask, false);
+}
+
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..51f50a7a07ef 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 		else
 			split_page_size_mask = 1 << PG_LEVEL_2M;
 
-		kernel_physical_mapping_init(__pa(vaddr & pmask),
-					     __pa((vaddr_end & pmask) + psize),
-					     split_page_size_mask);
+		/*
+		 * kernel_physical_mapping_change() does not flush the TLBs, so
+		 * a TLB flush is required after we exit from the for loop.
+		 */
+		kernel_physical_mapping_change(__pa(vaddr & pmask),
+					       __pa((vaddr_end & pmask) + psize),
+					       split_page_size_mask);
 	}
 
 	ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
 unsigned long kernel_physical_mapping_init(unsigned long start,
 					     unsigned long end,
 					     unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+					     unsigned long end,
+					     unsigned long page_size_mask);
 void zone_sizes_init(void);
 
 extern int after_bootmem;
diff --git a/lib/hweight.c b/lib/hweight.c
index 7660d88fd496..c94586b62551 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -10,7 +10,6 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -27,7 +26,6 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
-#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -46,7 +44,6 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
-#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -69,4 +66,3 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
-#endif

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-04-27 14:42 Ingo Molnar
@ 2019-04-27 18:45 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-04-27 18:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 27 Apr 2019 16:42:56 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/037904a22bf8b2c999a6e2a8ba971b549c1e9600

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-04-27 14:42 Ingo Molnar
  2019-04-27 18:45 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-04-27 14:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0d02113b31b2017dd349ec9df2314e798a90fa6e x86/mm: Fix a crash with kmemleak_scan()

Two fixes:

 - Fix an early boot crash in the RSDP parsing code by effectively 
   turning off the parsing call - we ran out of time but want to fix the
   regression. The more involved fix is being worked on.

 - Fix a crash that can trigger in the kmemlek code.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/boot: Disable RSDP parsing temporarily

Qian Cai (1):
      x86/mm: Fix a crash with kmemleak_scan()


 arch/x86/boot/compressed/misc.c | 2 +-
 arch/x86/mm/init.c              | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..5a237e8dbf8d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	boot_params->hdr.loadflags &= ~KASLR_FLAG;
 
 	/* Save RSDP address for later use. */
-	boot_params->acpi_rsdp_addr = get_rsdp_addr();
+	/* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
 
 	sanitize_boot_params(boot_params);
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080..8dacdb96899e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
 #include <linux/memblock.h>
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
+#include <linux/kmemleak.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
 	if (debug_pagealloc_enabled()) {
 		pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
 			begin, end - 1);
+		/*
+		 * Inform kmemleak about the hole in the memory since the
+		 * corresponding pages will be unmapped.
+		 */
+		kmemleak_free_part((void *)begin, end - begin);
 		set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 	} else {
 		/*

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-04-20  7:38 Ingo Molnar
@ 2019-04-20 19:25 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-04-20 19:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sat, 20 Apr 2019 09:38:09 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1fd91d719eb1ae83ef500eb4148d11db9db39a41

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-04-20  7:38 Ingo Molnar
  2019-04-20 19:25 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-04-20  7:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 2ee27796f298b710992a677a7e4d35c8c588b17e x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority

Misc fixes all over the place: a console spam fix, section attributes 
fixes, a KASLR fix, a TLB stack-variable alignment fix, a reboot quirk, 
boot options related warnings fix, an LTO fix, a deadlock fix and an RDT 
fix.

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/cpu/bugs: Use __initconst for 'const' init data

Baoquan He (1):
      x86/mm/KASLR: Fix the size of the direct mapping section

Colin Ian King (1):
      x86/Kconfig: Fix spelling mistake "effectivness" -> "effectiveness"

Hans de Goede (1):
      x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority

Jian-Hong Pan (1):
      x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T

Peter Zijlstra (1):
      x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info"

Sami Tolvanen (1):
      x86/build/lto: Fix truncated .bss with -fdata-sections

Thomas Gleixner (2):
      x86/speculation: Prevent deadlock on ssb_state::lock
      x86/mm: Prevent bogus warnings with "noexec=off"

Xiaochen Shen (1):
      x86/resctrl: Do not repeat rdtgroup mode initialization


 arch/x86/Kconfig                       |  2 +-
 arch/x86/kernel/cpu/bugs.c             |  6 +++---
 arch/x86/kernel/cpu/intel.c            |  4 ++--
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  3 ++-
 arch/x86/kernel/process.c              |  8 ++++++--
 arch/x86/kernel/reboot.c               | 21 +++++++++++++++++++++
 arch/x86/kernel/vmlinux.lds.S          |  2 +-
 arch/x86/mm/dump_pagetables.c          |  3 ++-
 arch/x86/mm/ioremap.c                  |  2 +-
 arch/x86/mm/kaslr.c                    |  2 +-
 arch/x86/mm/tlb.c                      |  2 +-
 include/linux/efi.h                    |  7 ++++++-
 12 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..01dbb05bc498 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS
 	depends on DEBUG_FS
 	---help---
 	  Expose statistics about the Change Page Attribute mechanims, which
-	  helps to determine the effectivness of preserving large and huge
+	  helps to determine the effectiveness of preserving large and huge
 	  page mappings when mapping protections are changed.
 
 config ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4..b91b3bfa5cfb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
 	const char			*option;
 	enum spectre_v2_user_cmd	cmd;
 	bool				secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
 	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
 	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
 	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
@@ -419,7 +419,7 @@ static const struct {
 	const char *option;
 	enum spectre_v2_mitigation_cmd cmd;
 	bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
@@ -658,7 +658,7 @@ static const char * const ssb_strings[] = {
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[]  __initdata = {
+} ssb_mitigation_options[]  __initconst = {
 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
 	if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
 		return;
 
-	pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
-	pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+	pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+	pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
 	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
 	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
 }
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 54b9eef3eea9..85212a32b54d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
 			return ret;
 		}
-		rdtgrp->mode = RDT_MODE_SHAREABLE;
 	}
 
+	rdtgrp->mode = RDT_MODE_SHAREABLE;
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a..957eae13b370 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 	u64 msr = x86_spec_ctrl_base;
 	bool updmsr = false;
 
+	lockdep_assert_irqs_disabled();
+
 	/*
 	 * If TIF_SSBD is different, select the proper mitigation
 	 * method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
 
 void speculation_ctrl_update(unsigned long tif)
 {
+	unsigned long flags;
+
 	/* Forced update. Make sure all relevant TIF flags are different */
-	preempt_disable();
+	local_irq_save(flags);
 	__speculation_ctrl_update(~tif, tif);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /* Called from seccomp/prctl update */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c0..8fd3cedd9acc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 	return 0;
 }
 
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+		reboot_type = BOOT_EFI;
+		pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+	}
+	return 0;
+}
+
 void __noreturn machine_real_restart(unsigned int type)
 {
 	local_irq_disable();
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
 		},
 	},
+	{	/* Handle reboot issue on Acer TravelMate X514-51T */
+		.callback = set_efi_reboot,
+		.ident = "Acer TravelMate X514-51T",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+		},
+	},
 
 	/* Apple */
 	{	/* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..a5127b2c195f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,7 +362,7 @@ SECTIONS
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__bss_start = .;
 		*(.bss..page_aligned)
-		*(.bss)
+		*(BSS_MAIN)
 		BSS_DECRYPTED
 		. = ALIGN(PAGE_SIZE);
 		__bss_stop = .;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab46941..c0309ea9abee 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
 #endif
 	/* Account the WX pages */
 	st->wx_pages += npages;
-	WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+	WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+		  "x86/mm: Found insecure W+X mapping at address %pS\n",
 		  (void *)st->start_address);
 }
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a4..dd73d5d74393 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
 	pte = early_ioremap_pte(addr);
 
 	/* Sanitize 'prot' against any unsupported bits: */
-	pgprot_val(flags) &= __default_kernel_pte_mask;
+	pgprot_val(flags) &= __supported_pte_mask;
 
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e9..d669c5e797e0 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
 	if (!kaslr_memory_enabled())
 		return;
 
-	kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+	kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
 	kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
 
 	/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075..487b8474c01c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 {
 	int cpu;
 
-	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+	struct flush_tlb_info info = {
 		.mm = mm,
 		.stride_shift = stride_shift,
 		.freed_tables = freed_tables,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 54357a258b35..6ebc2098cfe1 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   struct screen_info *si, efi_guid_t *proto,
 			   unsigned long size);
 
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
 extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
 extern unsigned long efi_call_virt_save_flags(void);
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-04-12 13:10 Ingo Molnar
@ 2019-04-13  4:05 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-04-13  4:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Fri, 12 Apr 2019 15:10:42 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/6d0a598489ca687e1ebac37eef546526eca41347

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-04-12 13:10 Ingo Molnar
  2019-04-13  4:05 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-04-12 13:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 x86/asm: Use stricter assembly constraints in bitops

Fix typos in user-visible resctrl parameters, and also fix assembly 
constraint bugs that might result in miscompilation.

 Thanks,

	Ingo

------------------>
Alexander Potapenko (1):
      x86/asm: Use stricter assembly constraints in bitops

Xiaochen Shen (1):
      x86/resctrl: Fix typos in the mba_sc mount option


 arch/x86/include/asm/bitops.h          | 41 +++++++++++++++-------------------
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  6 ++---
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..8e790ec219a5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
 	bool negative;
 	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
+		: CC_OUT(s) (negative), WBYTE_ADDR(addr)
 		: "ir" ((char) ~(1 << nr)) : "memory");
 	return negative;
 }
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
  * __clear_bit() is non-atomic and implies release semantics before the memory
  * operation. It can be used for an unlock if no other CPUs can concurrently
  * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
  */
 static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
 	__clear_bit(nr, addr);
 }
 
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 
 	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : CC_OUT(c) (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 399601eda8e4..54b9eef3eea9 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2039,14 +2039,14 @@ static int rdt_get_tree(struct fs_context *fc)
 enum rdt_param {
 	Opt_cdp,
 	Opt_cdpl2,
-	Opt_mba_mpbs,
+	Opt_mba_mbps,
 	nr__rdt_params
 };
 
 static const struct fs_parameter_spec rdt_param_specs[] = {
 	fsparam_flag("cdp",		Opt_cdp),
 	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_mpbs",	Opt_mba_mpbs),
+	fsparam_flag("mba_MBps",	Opt_mba_mbps),
 	{}
 };
 
@@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_cdpl2:
 		ctx->enable_cdpl2 = true;
 		return 0;
-	case Opt_mba_mpbs:
+	case Opt_mba_mbps:
 		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 			return -EINVAL;
 		ctx->enable_mba_mbps = true;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-02-17 10:19 Ingo Molnar
@ 2019-02-17 16:50 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-02-17 16:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 17 Feb 2019 11:19:08 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8d33316d520501f24fef180ea5b860ecb9e64506

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-02-17 10:19 Ingo Molnar
  2019-02-17 16:50 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-02-17 10:19 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f331e766c4be33f4338574f3c9f7f77e98ab4571 x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls

Three changes:

 - An UV fix/quirk to pull UV BIOS calls into the efi_runtime_lock 
   locking regime. (This done by aliasing __efi_uv_runtime_lock to
   efi_runtime_lock, which should make the quirk nature obvious and
   maintain the general policy that the EFI lock (name...) isn't exposed
   to drivers.)

 - Our version of MAGA: Make a.out Great Again.

 - Add a new Intel model name enumerator to an upstream header to help 
   reduce dependencies going forward.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/firmware/efi/runtime-wrappers.c# f331e766c4be: x86/platform/UV: Use efi_run

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/a.out: Clear the dump structure initially

Hedi Berriche (1):
      x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls

Rajneesh Bhardwaj (1):
      x86/CPU: Add Icelake model number


 arch/x86/ia32/ia32_aout.c               |  6 ++++--
 arch/x86/include/asm/intel-family.h     |  2 ++
 arch/x86/include/asm/uv/bios.h          |  8 +++++++-
 arch/x86/platform/uv/bios_uv.c          | 23 +++++++++++++++++++++--
 drivers/firmware/efi/runtime-wrappers.c |  7 +++++++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f65b78d32f5e..7dbbe9ffda17 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -51,7 +51,7 @@ static unsigned long get_dr(int n)
 /*
  * fill in the user structure for a core dump..
  */
-static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
+static void fill_dump(struct pt_regs *regs, struct user32 *dump)
 {
 	u32 fs, gs;
 	memset(dump, 0, sizeof(*dump));
@@ -157,10 +157,12 @@ static int aout_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 	has_dumped = 1;
+
+	fill_dump(cprm->regs, &dump);
+
 	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
 	dump.u_ar0 = offsetof(struct user32, regs);
 	dump.signal = cprm->siginfo->si_signo;
-	dump_thread32(cprm->regs, &dump);
 
 	/*
 	 * If the size of the dump file exceeds the rlimit, then see
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index d9a9993af882..9f15384c504a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,8 @@
 
 #define INTEL_FAM6_CANNONLAKE_MOBILE	0x66
 
+#define INTEL_FAM6_ICELAKE_MOBILE	0x7E
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..3f697a9e3f59 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -48,7 +48,8 @@ enum {
 	BIOS_STATUS_SUCCESS		=  0,
 	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
 	BIOS_STATUS_EINVAL		= -EINVAL,
-	BIOS_STATUS_UNAVAIL		= -EBUSY
+	BIOS_STATUS_UNAVAIL		= -EBUSY,
+	BIOS_STATUS_ABORT		= -EINTR,
 };
 
 /* Address map parameters */
@@ -167,4 +168,9 @@ extern long system_serial_number;
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
 #endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4a6a5a26c582..eb33432f2f24 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -29,7 +29,8 @@
 
 struct uv_systab *uv_systab;
 
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+			u64 a4, u64 a5)
 {
 	struct uv_systab *tab = uv_systab;
 	s64 ret;
@@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 
 	return ret;
 }
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+{
+	s64 ret;
+
+	if (down_interruptible(&__efi_uv_runtime_lock))
+		return BIOS_STATUS_ABORT;
+
+	ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+	up(&__efi_uv_runtime_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(uv_bios_call);
 
 s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	unsigned long bios_flags;
 	s64 ret;
 
+	if (down_interruptible(&__efi_uv_runtime_lock))
+		return BIOS_STATUS_ABORT;
+
 	local_irq_save(bios_flags);
-	ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+	ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
 	local_irq_restore(bios_flags);
 
+	up(&__efi_uv_runtime_lock);
+
 	return ret;
 }
 
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 8903b9ccfc2b..e2abfdb5cee6 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -146,6 +146,13 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
  */
 static DEFINE_SEMAPHORE(efi_runtime_lock);
 
+/*
+ * Expose the EFI runtime lock to the UV platform
+ */
+#ifdef CONFIG_X86_UV
+extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
+#endif
+
 /*
  * Calls the appropriate efi_runtime_service() with the appropriate
  * arguments.

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-02-10  9:13 Ingo Molnar
@ 2019-02-10 18:30 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-02-10 18:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 10 Feb 2019 10:13:40 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/aadaa8061189a9e5d8a1327b328453d663e8cbc9

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-02-10  9:13 Ingo Molnar
  2019-02-10 18:30 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-02-10  9:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 20e55bc17dd01f13cec0eb17e76e9511b23963ef x86/mm: Make set_pmd_at() paravirt aware

A handful of fixes:

 - Fix an MCE corner case bug/crash found via MCE injection testing

 - Fix 5-level paging boot crash

 - Fix MCE recovery cache invalidation bug

 - Fix regression on Xen guests caused by a recent PMD level mremap 
   speedup optimization

 Thanks,

	Ingo

------------------>
Juergen Gross (1):
      x86/mm: Make set_pmd_at() paravirt aware

Kirill A. Shutemov (1):
      x86/boot/compressed/64: Do not corrupt EDX on EFER.LME=1 setting

Peter Zijlstra (1):
      x86/mm/cpa: Fix set_mce_nospec()

Tony Luck (1):
      x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out()


 arch/x86/boot/compressed/head_64.S |  2 ++
 arch/x86/include/asm/pgtable.h     |  2 +-
 arch/x86/kernel/cpu/mce/core.c     |  1 +
 arch/x86/mm/pageattr.c             | 50 +++++++++++++++++++-------------------
 4 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f105ae8651c9..f62e347862cc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src)
 3:
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
 	pushl	%ecx
+	pushl	%edx
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_LME, %eax
 	wrmsr
+	popl	%edx
 	popl	%ecx
 
 	/* Enable PAE and LA57 (if required) paging modes */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..2779ace16d23 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
-	native_set_pmd(pmdp, pmd);
+	set_pmd(pmdp, pmd);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 672c7225cb1b..6ce290c506d9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			quirk_no_way_out(i, m, regs);
 
 		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			m->bank = i;
 			mce_read_aux(m, i);
 			*msg = tmp;
 			return 1;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f8972311a77..14e6119838a6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
 
 #endif
 
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+	return (long)(addr << 1) >> 1;
+#else
+	return addr;
+#endif
+}
+
 static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
 {
 	if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
 	unsigned int i;
 
 	for (i = 0; i < cpa->numpages; i++)
-		__flush_tlb_one_kernel(__cpa_addr(cpa, i));
+		__flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
 }
 
 static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
 		 * Only flush present addresses:
 		 */
 		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
-			clflush_cache_range_opt((void *)addr, PAGE_SIZE);
+			clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
 	}
 	mb();
 }
@@ -1627,29 +1650,6 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 	return ret;
 }
 
-/*
- * Machine check recovery code needs to change cache mode of poisoned
- * pages to UC to avoid speculative access logging another error. But
- * passing the address of the 1:1 mapping to set_memory_uc() is a fine
- * way to encourage a speculative access. So we cheat and flip the top
- * bit of the address. This works fine for the code that updates the
- * page tables. But at the end of the process we need to flush the cache
- * and the non-canonical address causes a #GP fault when used by the
- * CLFLUSH instruction.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long make_addr_canonical_again(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
-	return (long)(addr << 1) >> 1;
-#else
-	return addr;
-#endif
-}
-
-
 static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr,
 				    int force_split, int in_flag,

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2019-01-11  7:14 Ingo Molnar
@ 2019-01-11 18:00 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2019-01-11 18:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Fri, 11 Jan 2019 08:14:54 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e8af37f3f488e7adce2b5c6f6dfe8c83c2662e1f

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2019-01-11  7:14 Ingo Molnar
  2019-01-11 18:00 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2019-01-11  7:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e4f358916d528d479c3c12bd2fd03f2d5a576380 x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE

A 32-bit build fix, CONFIG_RETPOLINE fixes and rename CONFIG_RESCTRL to 
CONFIG_X86_RESCTRL.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/module.h             # e4f358916d52: x86, modpost: Replace last r
  samples/seccomp/Makefile           # a77d1d196bc6: samples/seccomp: Fix 32-bit 
  scripts/mod/modpost.c              # e4f358916d52: x86, modpost: Replace last r

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/cache: Rename config option to CONFIG_X86_RESCTRL

Tycho Andersen (1):
      samples/seccomp: Fix 32-bit build

WANG Chao (1):
      x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE


 Documentation/x86/resctrl_ui.txt     | 2 +-
 arch/x86/Kconfig                     | 2 +-
 arch/x86/include/asm/resctrl_sched.h | 4 ++--
 arch/x86/kernel/cpu/Makefile         | 2 +-
 arch/x86/kernel/cpu/bugs.c           | 2 +-
 arch/x86/kernel/cpu/resctrl/Makefile | 4 ++--
 include/linux/compiler-gcc.h         | 2 +-
 include/linux/module.h               | 2 +-
 include/linux/sched.h                | 2 +-
 samples/seccomp/Makefile             | 1 +
 scripts/mod/modpost.c                | 2 +-
 11 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt
index d9aed8303984..e8e8d14d3c4e 100644
--- a/Documentation/x86/resctrl_ui.txt
+++ b/Documentation/x86/resctrl_ui.txt
@@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com>
 Tony Luck <tony.luck@intel.com>
 Vikas Shivappa <vikas.shivappa@intel.com>
 
-This feature is enabled by the CONFIG_RESCTRL and the X86 /proc/cpuinfo
+This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo
 flag bits:
 RDT (Resource Director Technology) Allocation - "rdt_a"
 CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6185d4f33296..15af091611e2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -446,7 +446,7 @@ config RETPOLINE
 	  branches. Requires a compiler with -mindirect-branch=thunk-extern
 	  support for full protection. The kernel may run slower.
 
-config RESCTRL
+config X86_RESCTRL
 	bool "Resource Control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
 	select KERNFS
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
index 54990fe2a3ae..40ebddde6ac2 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl_sched.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_RESCTRL_SCHED_H
 #define _ASM_X86_RESCTRL_SCHED_H
 
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
 
 #include <linux/sched.h>
 #include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
 
 static inline void resctrl_sched_in(void) {}
 
-#endif /* CONFIG_RESCTRL */
+#endif /* CONFIG_X86_RESCTRL */
 
 #endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ac78f90aea56..b6fa0869f7aa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 obj-$(CONFIG_X86_MCE)			+= mce/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
-obj-$(CONFIG_RESCTRL)			+= resctrl/
+obj-$(CONFIG_X86_RESCTRL)		+= resctrl/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8654b8b0c848..1de0f4170178 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -215,7 +215,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
 static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
 	SPECTRE_V2_USER_NONE;
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 static bool spectre_v2_bad_module;
 
 bool retpoline_module_ok(bool has_retpoline)
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 6895049ceef7..1cabe6fd8e11 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_RESCTRL)	+= core.o rdtgroup.o monitor.o
-obj-$(CONFIG_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_RESCTRL)	+= core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
 CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5776da43da97..dd8268f5f5f0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,7 +68,7 @@
  */
 #define uninitialized_var(x) x = x
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 #define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 9a21fe3509af..8fa38d3e7538 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -828,7 +828,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
 
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
 extern bool retpoline_module_ok(bool has_retpoline);
 #else
 static inline bool retpoline_module_ok(bool has_retpoline)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89541d248893..224666226e87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -995,7 +995,7 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
 	struct list_head		cg_list;
 #endif
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
 	u32				closid;
 	u32				rmid;
 #endif
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 4920903c8009..fb43a814d4c0 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -34,6 +34,7 @@ HOSTCFLAGS_bpf-direct.o += $(MFLAG)
 HOSTCFLAGS_dropper.o += $(MFLAG)
 HOSTCFLAGS_bpf-helper.o += $(MFLAG)
 HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
+HOSTCFLAGS_user-trap.o += $(MFLAG)
 HOSTLDLIBS_bpf-direct += $(MFLAG)
 HOSTLDLIBS_bpf-fancy += $(MFLAG)
 HOSTLDLIBS_dropper += $(MFLAG)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 0de2fb236640..26bf886bd168 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2185,7 +2185,7 @@ static void add_intree_flag(struct buffer *b, int is_intree)
 /* Cannot check for assembler */
 static void add_retpoline(struct buffer *b)
 {
-	buf_printf(b, "\n#ifdef RETPOLINE\n");
+	buf_printf(b, "\n#ifdef CONFIG_RETPOLINE\n");
 	buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
 	buf_printf(b, "#endif\n");
 }

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-12-21 12:25 Ingo Molnar
@ 2018-12-21 19:30 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2018-12-21 19:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Masahiro Yamada

The pull request you sent on Fri, 21 Dec 2018 13:25:45 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/70ad6368e878857db315788dab36817aa992c86a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-12-21 12:25 Ingo Molnar
  2018-12-21 19:30 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-12-21 12:25 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton, Masahiro Yamada

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 6ac389346e6964e1f6a1c675cebf8bd0912526a5 Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"

The biggest part is a series of reverts for the macro based GCC inlining 
workarounds. It caused regressions in distro build and other kernel 
tooling environments, and the GCC project was very receptive to fixing 
the underlying inliner weaknesses - so as time ran out we decided to do a 
reasonably straightforward revert of the patches. The plan is to rely on 
the 'asm inline' GCC 9 feature, which might be backported to GCC 8 and 
could thus become reasonably widely available on modern distros.

Other than those reverts, there's misc fixes from all around the place.

I wish our final x86 pull request for v4.20 was smaller...

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  Makefile                           # 6ac389346e69: Revert "kbuild/Makefile: Pre
  include/asm-generic/bug.h          # ffb61c6346d0: Revert "x86/bug: Macrofy the
  include/linux/compiler.h           # 96af6cd02a10: Revert "x86/objtool: Use asm
  scripts/Kbuild.include             # 6ac389346e69: Revert "kbuild/Makefile: Pre
  scripts/mod/Makefile               # 6ac389346e69: Revert "kbuild/Makefile: Pre

 Thanks,

	Ingo

------------------>
Alistair Strachan (1):
      x86/vdso: Pass --eh-frame-hdr to the linker

Chang S. Bae (1):
      x86/fsgsbase/64: Fix the base write helper functions

Colin Ian King (1):
      x86/mtrr: Don't copy uninitialized gentry fields back to userspace

Dan Williams (1):
      x86/mm: Fix decoy address handling vs 32-bit builds

Ingo Molnar (9):
      Revert "x86/jump-labels: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/cpufeature: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/extable: Macrofy inline assembly code to work around GCC inlining bugs"
      Revert "x86/paravirt: Work around GCC inlining bugs when compiling paravirt ops"
      Revert "x86/bug: Macrofy the BUG table section handling, to work around GCC inlining bugs"
      Revert "x86/alternatives: Macrofy lock prefixes to work around GCC inlining bugs"
      Revert "x86/refcount: Work around GCC inlining bug"
      Revert "x86/objtool: Use asm macros to work around GCC inlining bugs"
      Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"

Kirill A. Shutemov (2):
      x86/mm: Fix guard hole handling
      x86/dump_pagetables: Fix LDT remap address marker

Peter Zijlstra (1):
      x86/mm/cpa: Fix cpa_flush_array() TLB invalidation

Reinette Chatre (1):
      x86/intel_rdt: Ensure a CPU remains online for the region's pseudo-locking sequence


 Makefile                                    |  9 +--
 arch/x86/Makefile                           |  7 --
 arch/x86/entry/calling.h                    |  2 +-
 arch/x86/entry/vdso/Makefile                |  3 +-
 arch/x86/include/asm/alternative-asm.h      | 20 ++----
 arch/x86/include/asm/alternative.h          | 11 +++-
 arch/x86/include/asm/asm.h                  | 53 +++++++++------
 arch/x86/include/asm/bug.h                  | 98 ++++++++++++----------------
 arch/x86/include/asm/cpufeature.h           | 82 ++++++++++--------------
 arch/x86/include/asm/fsgsbase.h             | 15 +++--
 arch/x86/include/asm/jump_label.h           | 72 +++++++++++++++------
 arch/x86/include/asm/paravirt_types.h       | 56 ++++++++--------
 arch/x86/include/asm/pgtable_64_types.h     |  5 ++
 arch/x86/include/asm/refcount.h             | 81 ++++++++++-------------
 arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c |  4 ++
 arch/x86/kernel/cpu/mtrr/if.c               |  2 +
 arch/x86/kernel/macros.S                    | 16 -----
 arch/x86/kernel/process_64.c                | 99 ++++++++++++++++-------------
 arch/x86/kernel/ptrace.c                    |  9 +--
 arch/x86/mm/dump_pagetables.c               | 15 ++---
 arch/x86/mm/pageattr.c                      | 24 ++++---
 arch/x86/mm/pat.c                           | 13 +++-
 arch/x86/xen/mmu_pv.c                       | 11 ++--
 include/asm-generic/bug.h                   |  8 +--
 include/linux/compiler.h                    | 56 ++++------------
 scripts/Kbuild.include                      |  4 +-
 scripts/mod/Makefile                        |  2 -
 27 files changed, 385 insertions(+), 392 deletions(-)
 delete mode 100644 arch/x86/kernel/macros.S

diff --git a/Makefile b/Makefile
index e9fd22c8445e..da4bb1e10388 100644
--- a/Makefile
+++ b/Makefile
@@ -1081,7 +1081,7 @@ scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
 # version.h and scripts_basic is processed / created.
 
 # Listed in dependency order
-PHONY += prepare archprepare macroprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
 
 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@@ -1104,9 +1104,7 @@ prepare2: prepare3 outputmakefile asm-generic
 prepare1: prepare2 $(version_h) $(autoksyms_h) include/generated/utsrelease.h
 	$(cmd_crmodverdir)
 
-macroprepare: prepare1 archmacros
-
-archprepare: archheaders archscripts macroprepare scripts_basic
+archprepare: archheaders archscripts prepare1 scripts_basic
 
 prepare0: archprepare gcc-plugins
 	$(Q)$(MAKE) $(build)=.
@@ -1174,9 +1172,6 @@ archheaders:
 PHONY += archscripts
 archscripts:
 
-PHONY += archmacros
-archmacros:
-
 PHONY += __headers
 __headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
 	$(Q)$(MAKE) $(build)=scripts build_unifdef
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 75ef499a66e2..85a66c4a8b65 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,13 +232,6 @@ archscripts: scripts_basic
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
 
-archmacros:
-	$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
-
-ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
-export ASM_MACRO_FLAGS
-KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
-
 ###
 # Kernel objects
 
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 25e5a6bda8c3..20d0885b00fb 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -352,7 +352,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro CALL_enter_from_user_mode
 #ifdef CONFIG_CONTEXT_TRACKING
 #ifdef HAVE_JUMP_LABEL
-	STATIC_BRANCH_JMP l_yes=.Lafter_call_\@, key=context_tracking_enabled, branch=1
+	STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
 #endif
 	call enter_from_user_mode
 .Lafter_call_\@:
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0624bf2266fd..5bfe2243a08f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -171,7 +171,8 @@ quiet_cmd_vdso = VDSO    $@
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
 VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
-	$(call ld-option, --build-id) -Bsymbolic
+	$(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
+	-Bsymbolic
 GCOV_PROFILE := n
 
 #
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 8e4ea39e55d0..31b627b43a8e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -7,24 +7,16 @@
 #include <asm/asm.h>
 
 #ifdef CONFIG_SMP
-.macro LOCK_PREFIX_HERE
+	.macro LOCK_PREFIX
+672:	lock
 	.pushsection .smp_locks,"a"
 	.balign 4
-	.long 671f - .		# offset
+	.long 672b - .
 	.popsection
-671:
-.endm
-
-.macro LOCK_PREFIX insn:vararg
-	LOCK_PREFIX_HERE
-	lock \insn
-.endm
+	.endm
 #else
-.macro LOCK_PREFIX_HERE
-.endm
-
-.macro LOCK_PREFIX insn:vararg
-.endm
+	.macro LOCK_PREFIX
+	.endm
 #endif
 
 /*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d7faa16622d8..4cd6a3b71824 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -31,8 +31,15 @@
  */
 
 #ifdef CONFIG_SMP
-#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t"
-#define LOCK_PREFIX "LOCK_PREFIX "
+#define LOCK_PREFIX_HERE \
+		".pushsection .smp_locks,\"a\"\n"	\
+		".balign 4\n"				\
+		".long 671f - .\n" /* offset */		\
+		".popsection\n"				\
+		"671:"
+
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+
 #else /* ! CONFIG_SMP */
 #define LOCK_PREFIX_HERE ""
 #define LOCK_PREFIX ""
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 21b086786404..6467757bb39f 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -120,25 +120,12 @@
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	ASM_EXTABLE_HANDLE from to handler
-
-.macro ASM_EXTABLE_HANDLE from:req to:req handler:req
-	.pushsection "__ex_table","a"
-	.balign 4
-	.long (\from) - .
-	.long (\to) - .
-	.long (\handler) - .
+	.pushsection "__ex_table","a" ;				\
+	.balign 4 ;						\
+	.long (from) - . ;					\
+	.long (to) - . ;					\
+	.long (handler) - . ;					\
 	.popsection
-.endm
-#else /* __ASSEMBLY__ */
-
-# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	"ASM_EXTABLE_HANDLE from=" #from " to=" #to		\
-	" handler=\"" #handler "\"\n\t"
-
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-
-#endif /* __ASSEMBLY__ */
 
 # define _ASM_EXTABLE(from, to)					\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
@@ -161,7 +148,6 @@
 	_ASM_PTR (entry);					\
 	.popsection
 
-#ifdef __ASSEMBLY__
 .macro ALIGN_DESTINATION
 	/* check for bad alignment of destination */
 	movl %edi,%ecx
@@ -185,7 +171,34 @@
 	_ASM_EXTABLE_UA(100b, 103b)
 	_ASM_EXTABLE_UA(101b, 103b)
 	.endm
-#endif /* __ASSEMBLY__ */
+
+#else
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
+	" .pushsection \"__ex_table\",\"a\"\n"			\
+	" .balign 4\n"						\
+	" .long (" #from ") - .\n"				\
+	" .long (" #to ") - .\n"				\
+	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
+	" .popsection\n"
+
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
+# define _ASM_EXTABLE_REFCOUNT(from, to)			\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+#endif
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 5090035e6d16..6804d6642767 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -4,8 +4,6 @@
 
 #include <linux/stringify.h>
 
-#ifndef __ASSEMBLY__
-
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
  *
@@ -22,15 +20,53 @@
 
 #define LEN_UD2		2
 
+#ifdef CONFIG_GENERIC_BUG
+
+#ifdef CONFIG_X86_32
+# define __BUG_REL(val)	".long " __stringify(val)
+#else
+# define __BUG_REL(val)	".long " __stringify(val) " - 2b"
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags)						\
+do {									\
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"aw\"\n"		\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
+		     "\t.word %c1"        "\t# bug_entry::line\n"	\
+		     "\t.word %c2"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c3\n"					\
+		     ".popsection"					\
+		     : : "i" (__FILE__), "i" (__LINE__),		\
+			 "i" (flags),					\
+			 "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
-	asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 "	\
-		     "flags=%c2 size=%c3"				\
-		     : : "i" (__FILE__), "i" (__LINE__),                \
-			 "i" (flags),                                   \
+	asm volatile("1:\t" ins "\n"					\
+		     ".pushsection __bug_table,\"aw\"\n"		\
+		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
+		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
+		     "\t.org 2b+%c1\n"					\
+		     ".popsection"					\
+		     : : "i" (flags),					\
 			 "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#else
+
+#define _BUG_FLAGS(ins, flags)  asm volatile(ins)
+
+#endif /* CONFIG_GENERIC_BUG */
+
 #define HAVE_ARCH_BUG
 #define BUG()							\
 do {								\
@@ -46,54 +82,4 @@ do {								\
 
 #include <asm-generic/bug.h>
 
-#else /* __ASSEMBLY__ */
-
-#ifdef CONFIG_GENERIC_BUG
-
-#ifdef CONFIG_X86_32
-.macro __BUG_REL val:req
-	.long \val
-.endm
-#else
-.macro __BUG_REL val:req
-	.long \val - 2b
-.endm
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1:	\ins
-	.pushsection __bug_table,"aw"
-2:	__BUG_REL val=1b	# bug_entry::bug_addr
-	__BUG_REL val=\file	# bug_entry::file
-	.word \line		# bug_entry::line
-	.word \flags		# bug_entry::flags
-	.org 2b+\size
-	.popsection
-.endm
-
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1:	\ins
-	.pushsection __bug_table,"aw"
-2:	__BUG_REL val=1b	# bug_entry::bug_addr
-	.word \flags		# bug_entry::flags
-	.org 2b+\size
-	.popsection
-.endm
-
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#else /* CONFIG_GENERIC_BUG */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-	\ins
-.endm
-
-#endif /* CONFIG_GENERIC_BUG */
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7d442722ef24..aced6c9290d6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -2,10 +2,10 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
 #include <asm/processor.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
 #include <asm/asm.h>
 #include <linux/bitops.h>
 
@@ -161,10 +161,37 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-	asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
-			  "cap_byte=\"%[cap_byte]\" "
-			  "feature=%P[feature] t_yes=%l[t_yes] "
-			  "t_no=%l[t_no] always=%P[always]"
+	asm_volatile_goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
 		 : : [feature]  "i" (bit),
 		     [always]   "i" (X86_FEATURE_ALWAYS),
 		     [bitnum]   "i" (1 << (bit & 7)),
@@ -199,44 +226,5 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 #define CPU_FEATURE_TYPEVAL		boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
 					boot_cpu_data.x86_model
 
-#else /* __ASSEMBLY__ */
-
-.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
-1:
-	jmp 6f
-2:
-	.skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
-3:
-	.section .altinstructions,"a"
-	.long 1b - .		/* src offset */
-	.long 4f - .		/* repl offset */
-	.word \always		/* always replace */
-	.byte 3b - 1b		/* src len */
-	.byte 5f - 4f		/* repl len */
-	.byte 3b - 2b		/* pad len */
-	.previous
-	.section .altinstr_replacement,"ax"
-4:
-	jmp \t_no
-5:
-	.previous
-	.section .altinstructions,"a"
-	.long 1b - .		/* src offset */
-	.long 0			/* no replacement */
-	.word \feature		/* feature bit */
-	.byte 3b - 1b		/* src len */
-	.byte 0			/* repl len */
-	.byte 0			/* pad len */
-	.previous
-	.section .altinstr_aux,"ax"
-6:
-	testb \bitnum,\cap_byte
-	jnz \t_yes
-	jmp \t_no
-	.previous
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
 #endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index eb377b6e9eed..bca4c743de77 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -16,8 +16,8 @@
  */
 extern unsigned long x86_fsbase_read_task(struct task_struct *task);
 extern unsigned long x86_gsbase_read_task(struct task_struct *task);
-extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
+extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -39,8 +39,15 @@ static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 	return gsbase;
 }
 
-extern void x86_fsbase_write_cpu(unsigned long fsbase);
-extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+	wrmsrl(MSR_FS_BASE, fsbase);
+}
+
+static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+}
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a5fb34fe56a4..21efc9d07ed9 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,19 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
+#ifndef HAVE_JUMP_LABEL
+/*
+ * For better or for worse, if jump labels (the gcc extension) are missing,
+ * then the entire static branch patching infrastructure is compiled out.
+ * If that happens, the code in here will malfunction.  Raise a compiler
+ * error instead.
+ *
+ * In theory, jump labels and the static branch patching infrastructure
+ * could be decoupled to fix this.
+ */
+#error asm/jump_label.h included on a non-jump-label kernel
+#endif
+
 #define JUMP_LABEL_NOP_SIZE 5
 
 #ifdef CONFIG_X86_64
@@ -20,9 +33,15 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" "
-			  "branch=\"%c1\""
-			: :  "i" (key), "i" (branch) : : l_yes);
+	asm_volatile_goto("1:"
+		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		".long 1b - ., %l[l_yes] - . \n\t"
+		_ASM_PTR "%c0 + %c1 - .\n\t"
+		".popsection \n\t"
+		: :  "i" (key), "i" (branch) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
@@ -30,8 +49,14 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
 
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" "
-			  "branch=\"%c1\""
+	asm_volatile_goto("1:"
+		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+		"2:\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		".long 1b - ., %l[l_yes] - . \n\t"
+		_ASM_PTR "%c0 + %c1 - .\n\t"
+		".popsection \n\t"
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -41,26 +66,37 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 
 #else	/* __ASSEMBLY__ */
 
-.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req
-.Lstatic_branch_nop_\@:
-	.byte STATIC_KEY_INIT_NOP
-.Lstatic_branch_no_after_\@:
+.macro STATIC_JUMP_IF_TRUE target, key, def
+.Lstatic_jump_\@:
+	.if \def
+	/* Equivalent to "jmp.d32 \target" */
+	.byte		0xe9
+	.long		\target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+	.else
+	.byte		STATIC_KEY_INIT_NOP
+	.endif
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
-	.long		.Lstatic_branch_nop_\@ - ., \l_yes - .
-	_ASM_PTR        \key + \branch - .
+	.long		.Lstatic_jump_\@ - ., \target - .
+	_ASM_PTR	\key - .
 	.popsection
 .endm
 
-.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req
-.Lstatic_branch_jmp_\@:
-	.byte 0xe9
-	.long \l_yes - .Lstatic_branch_jmp_after_\@
-.Lstatic_branch_jmp_after_\@:
+.macro STATIC_JUMP_IF_FALSE target, key, def
+.Lstatic_jump_\@:
+	.if \def
+	.byte		STATIC_KEY_INIT_NOP
+	.else
+	/* Equivalent to "jmp.d32 \target" */
+	.byte		0xe9
+	.long		\target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+	.endif
 	.pushsection __jump_table, "aw"
 	_ASM_ALIGN
-	.long		.Lstatic_branch_jmp_\@ - ., \l_yes - .
-	_ASM_PTR	\key + \branch - .
+	.long		.Lstatic_jump_\@ - ., \target - .
+	_ASM_PTR	\key + 1 - .
 	.popsection
 .endm
 
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 26942ad63830..488c59686a73 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -348,11 +348,23 @@ extern struct paravirt_patch_template pv_ops;
 #define paravirt_clobber(clobber)		\
 	[paravirt_clobber] "i" (clobber)
 
+/*
+ * Generate some code, and mark it as patchable by the
+ * apply_paravirt() alternate instruction patcher.
+ */
+#define _paravirt_alt(insn_string, type, clobber)	\
+	"771:\n\t" insn_string "\n" "772:\n"		\
+	".pushsection .parainstructions,\"a\"\n"	\
+	_ASM_ALIGN "\n"					\
+	_ASM_PTR " 771b\n"				\
+	"  .byte " type "\n"				\
+	"  .byte 772b-771b\n"				\
+	"  .short " clobber "\n"			\
+	".popsection\n"
+
 /* Generate patchable code, with the default asm parameters. */
-#define paravirt_call							\
-	"PARAVIRT_CALL type=\"%c[paravirt_typenum]\""			\
-	" clobber=\"%c[paravirt_clobber]\""				\
-	" pv_opptr=\"%c[paravirt_opptr]\";"
+#define paravirt_alt(insn_string)					\
+	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
 
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -372,6 +384,16 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
 
 int paravirt_disable_iospace(void);
 
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+#define PARAVIRT_CALL					\
+	ANNOTATE_RETPOLINE_SAFE				\
+	"call *%c[paravirt_opptr];"
+
 /*
  * These macros are intended to wrap calls through one of the paravirt
  * ops structs, so that they can be later identified and patched at
@@ -509,7 +531,7 @@ int paravirt_disable_iospace(void);
 		/* since this condition will never hold */		\
 		if (sizeof(rettype) > sizeof(unsigned long)) {		\
 			asm volatile(pre				\
-				     paravirt_call			\
+				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
 				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
@@ -519,7 +541,7 @@ int paravirt_disable_iospace(void);
 			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
 		} else {						\
 			asm volatile(pre				\
-				     paravirt_call			\
+				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
 				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
@@ -546,7 +568,7 @@ int paravirt_disable_iospace(void);
 		PVOP_VCALL_ARGS;					\
 		PVOP_TEST_NULL(op);					\
 		asm volatile(pre					\
-			     paravirt_call				\
+			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
@@ -664,26 +686,6 @@ struct paravirt_patch_site {
 extern struct paravirt_patch_site __parainstructions[],
 	__parainstructions_end[];
 
-#else	/* __ASSEMBLY__ */
-
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req
-771:	ANNOTATE_RETPOLINE_SAFE
-	call *\pv_opptr
-772:	.pushsection .parainstructions,"a"
-	_ASM_ALIGN
-	_ASM_PTR 771b
-	.byte \type
-	.byte 772b-771b
-	.short \clobber
-	.popsection
-.endm
-
 #endif	/* __ASSEMBLY__ */
 
 #endif	/* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 84bd9bdc1987..88bca456da99 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -111,6 +111,11 @@ extern unsigned int ptrs_per_p4d;
  */
 #define MAXMEM			(1UL << MAX_PHYSMEM_BITS)
 
+#define GUARD_HOLE_PGD_ENTRY	-256UL
+#define GUARD_HOLE_SIZE		(16UL << PGDIR_SHIFT)
+#define GUARD_HOLE_BASE_ADDR	(GUARD_HOLE_PGD_ENTRY << PGDIR_SHIFT)
+#define GUARD_HOLE_END_ADDR	(GUARD_HOLE_BASE_ADDR + GUARD_HOLE_SIZE)
+
 #define LDT_PGD_ENTRY		-240UL
 #define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 #define LDT_END_ADDR		(LDT_BASE_ADDR + PGDIR_SIZE)
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index a8b5e1e13319..dbaed55c1c24 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -4,41 +4,6 @@
  * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
  * PaX/grsecurity.
  */
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-#include <asm/bug.h>
-
-.macro REFCOUNT_EXCEPTION counter:req
-	.pushsection .text..refcount
-111:	lea \counter, %_ASM_CX
-112:	ud2
-	ASM_UNREACHABLE
-	.popsection
-113:	_ASM_EXTABLE_REFCOUNT(112b, 113b)
-.endm
-
-/* Trigger refcount exception if refcount result is negative. */
-.macro REFCOUNT_CHECK_LT_ZERO counter:req
-	js 111f
-	REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-.macro REFCOUNT_CHECK_LE_ZERO counter:req
-	jz 111f
-	REFCOUNT_CHECK_LT_ZERO counter="\counter"
-.endm
-
-/* Trigger refcount exception unconditionally. */
-.macro REFCOUNT_ERROR counter:req
-	jmp 111f
-	REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-#else /* __ASSEMBLY__ */
-
 #include <linux/refcount.h>
 #include <asm/bug.h>
 
@@ -50,12 +15,35 @@
  * central refcount exception. The fixup address for the exception points
  * back to the regular execution flow in .text.
  */
+#define _REFCOUNT_EXCEPTION				\
+	".pushsection .text..refcount\n"		\
+	"111:\tlea %[var], %%" _ASM_CX "\n"		\
+	"112:\t" ASM_UD2 "\n"				\
+	ASM_UNREACHABLE					\
+	".popsection\n"					\
+	"113:\n"					\
+	_ASM_EXTABLE_REFCOUNT(112b, 113b)
+
+/* Trigger refcount exception if refcount result is negative. */
+#define REFCOUNT_CHECK_LT_ZERO				\
+	"js 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+#define REFCOUNT_CHECK_LE_ZERO				\
+	"jz 111f\n\t"					\
+	REFCOUNT_CHECK_LT_ZERO
+
+/* Trigger refcount exception unconditionally. */
+#define REFCOUNT_ERROR					\
+	"jmp 111f\n\t"					\
+	_REFCOUNT_EXCEPTION
 
 static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
-		"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LT_ZERO
+		: [var] "+m" (r->refs.counter)
 		: "ir" (i)
 		: "cc", "cx");
 }
@@ -63,32 +51,31 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 static __always_inline void refcount_inc(refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "incl %0\n\t"
-		"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LT_ZERO
+		: [var] "+m" (r->refs.counter)
 		: : "cc", "cx");
 }
 
 static __always_inline void refcount_dec(refcount_t *r)
 {
 	asm volatile(LOCK_PREFIX "decl %0\n\t"
-		"REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
-		: [counter] "+m" (r->refs.counter)
+		REFCOUNT_CHECK_LE_ZERO
+		: [var] "+m" (r->refs.counter)
 		: : "cc", "cx");
 }
 
 static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
-
 	return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
-					 "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+					 REFCOUNT_CHECK_LT_ZERO,
 					 r->refs.counter, e, "er", i, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
 	return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
-					"REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+					REFCOUNT_CHECK_LT_ZERO,
 					r->refs.counter, e, "cx");
 }
 
@@ -106,8 +93,8 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
 		/* Did we try to increment from/to an undesirable state? */
 		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
-			asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
-				     : : [counter] "m" (r->refs.counter)
+			asm volatile(REFCOUNT_ERROR
+				     : : [var] "m" (r->refs.counter)
 				     : "cc", "cx");
 			break;
 		}
@@ -122,6 +109,4 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
 	return refcount_add_not_zero(1, r);
 }
 
-#endif /* __ASSEMBLY__ */
-
 #endif
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 27937458c231..efa4a519f5e5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -23,6 +23,7 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -310,9 +311,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		return -EINVAL;
 	buf[nbytes - 1] = '\0';
 
+	cpus_read_lock();
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 	if (!rdtgrp) {
 		rdtgroup_kn_unlock(of->kn);
+		cpus_read_unlock();
 		return -ENOENT;
 	}
 	rdt_last_cmd_clear();
@@ -367,6 +370,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 
 out:
 	rdtgroup_kn_unlock(of->kn);
+	cpus_read_unlock();
 	return ret ?: nbytes;
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 2e173d47b450..4d36dcc1cf87 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -165,6 +165,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	struct mtrr_gentry gentry;
 	void __user *arg = (void __user *) __arg;
 
+	memset(&gentry, 0, sizeof(gentry));
+
 	switch (cmd) {
 	case MTRRIOC_ADD_ENTRY:
 	case MTRRIOC_SET_ENTRY:
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
deleted file mode 100644
index 161c95059044..000000000000
--- a/arch/x86/kernel/macros.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file includes headers whose assembly part includes macros which are
- * commonly used. The macros are precompiled into assmebly file which is later
- * assembled together with each compiled file.
- */
-
-#include <linux/compiler.h>
-#include <asm/refcount.h>
-#include <asm/alternative-asm.h>
-#include <asm/bug.h>
-#include <asm/paravirt.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/jump_label.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bbfbf017065c..ddd4fa718c43 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -339,24 +339,6 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
 	return base;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase)
-{
-	/*
-	 * Set the selector to 0 as a notion, that the segment base is
-	 * overwritten, which will be checked for skipping the segment load
-	 * during context switch.
-	 */
-	loadseg(FS, 0);
-	wrmsrl(MSR_FS_BASE, fsbase);
-}
-
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
-	/* Set the selector to 0 for the same reason as %fs above. */
-	loadseg(GS, 0);
-	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
-
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
 	unsigned long fsbase;
@@ -385,38 +367,18 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 	return gsbase;
 }
 
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
 {
-	/*
-	 * Not strictly needed for %fs, but do it for symmetry
-	 * with %gs
-	 */
-	if (unlikely(fsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.fsbase = fsbase;
-	if (task == current)
-		x86_fsbase_write_cpu(fsbase);
-	task->thread.fsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
 {
-	if (unlikely(gsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.gsbase = gsbase;
-	if (task == current)
-		x86_gsbase_write_cpu_inactive(gsbase);
-	task->thread.gsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
@@ -754,11 +716,60 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 
 	switch (option) {
 	case ARCH_SET_GS: {
-		ret = x86_gsbase_write_task(task, arg2);
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * ARCH_SET_GS has always overwritten the index
+		 * and the base. Zero is the most sensible value
+		 * to put in the index, and is the only value that
+		 * makes any sense if FSGSBASE is unavailable.
+		 */
+		if (task == current) {
+			loadseg(GS, 0);
+			x86_gsbase_write_cpu_inactive(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.gsbase.
+			 */
+			task->thread.gsbase = arg2;
+
+		} else {
+			task->thread.gsindex = 0;
+			x86_gsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_SET_FS: {
-		ret = x86_fsbase_write_task(task, arg2);
+		/*
+		 * Not strictly needed for %fs, but do it for symmetry
+		 * with %gs
+		 */
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * Set the selector to 0 for the same reason
+		 * as %gs above.
+		 */
+		if (task == current) {
+			loadseg(FS, 0);
+			x86_fsbase_write_cpu(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.fsbase.
+			 */
+			task->thread.fsbase = arg2;
+		} else {
+			task->thread.fsindex = 0;
+			x86_fsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_GET_FS: {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ffae9b9740fd..4b8ee05dd6ad 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		/*
-		 * When changing the FS base, use the same
-		 * mechanism as for do_arch_prctl_64().
+		 * When changing the FS base, use do_arch_prctl_64()
+		 * to set the index to zero and to set the base
+		 * as requested.
 		 */
 		if (child->thread.fsbase != value)
-			return x86_fsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
 		/*
@@ -410,7 +411,7 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		if (child->thread.gsbase != value)
-			return x86_gsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
 		return 0;
 #endif
 	}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index fc37bbd23eb8..abcb8d00b014 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -55,10 +55,10 @@ struct addr_marker {
 enum address_markers_idx {
 	USER_SPACE_NR = 0,
 	KERNEL_SPACE_NR,
-	LOW_KERNEL_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
 	LDT_NR,
 #endif
+	LOW_KERNEL_NR,
 	VMALLOC_START_NR,
 	VMEMMAP_START_NR,
 #ifdef CONFIG_KASAN
@@ -66,9 +66,6 @@ enum address_markers_idx {
 	KASAN_SHADOW_END_NR,
 #endif
 	CPU_ENTRY_AREA_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
-	LDT_NR,
-#endif
 #ifdef CONFIG_X86_ESPFIX64
 	ESPFIX_START_NR,
 #endif
@@ -512,11 +509,11 @@ static inline bool is_hypervisor_range(int idx)
 {
 #ifdef CONFIG_X86_64
 	/*
-	 * ffff800000000000 - ffff87ffffffffff is reserved for
-	 * the hypervisor.
+	 * A hole in the beginning of kernel address space reserved
+	 * for a hypervisor.
 	 */
-	return	(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
-		(idx <  pgd_index(__PAGE_OFFSET));
+	return	(idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
+		(idx <  pgd_index(GUARD_HOLE_END_ADDR));
 #else
 	return false;
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index db7a10082238..a1bcde35db4c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -285,20 +285,16 @@ static void cpa_flush_all(unsigned long cache)
 	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 
-static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
+static bool __inv_flush_all(int cache)
 {
 	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
-	WARN_ON(PAGE_ALIGN(start) != start);
-
 	if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		cpa_flush_all(cache);
 		return true;
 	}
 
-	flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
-
-	return !cache;
+	return false;
 }
 
 static void cpa_flush_range(unsigned long start, int numpages, int cache)
@@ -306,7 +302,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	unsigned int i, level;
 	unsigned long addr;
 
-	if (__cpa_flush_range(start, numpages, cache))
+	WARN_ON(PAGE_ALIGN(start) != start);
+
+	if (__inv_flush_all(cache))
+		return;
+
+	flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
+
+	if (!cache)
 		return;
 
 	/*
@@ -332,7 +335,12 @@ static void cpa_flush_array(unsigned long baddr, unsigned long *start,
 {
 	unsigned int i, level;
 
-	if (__cpa_flush_range(baddr, numpages, cache))
+	if (__inv_flush_all(cache))
+		return;
+
+	flush_tlb_all();
+
+	if (!cache)
 		return;
 
 	/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 08013524fba1..4fe956a63b25 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -519,8 +519,13 @@ static u64 sanitize_phys(u64 address)
 	 * for a "decoy" virtual address (bit 63 clear) passed to
 	 * set_memory_X(). __pa() on a "decoy" address results in a
 	 * physical address with bit 63 set.
+	 *
+	 * Decoy addresses are not present for 32-bit builds, see
+	 * set_mce_nospec().
 	 */
-	return address & __PHYSICAL_MASK;
+	if (IS_ENABLED(CONFIG_X86_64))
+		return address & __PHYSICAL_MASK;
+	return address;
 }
 
 /*
@@ -546,7 +551,11 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
 
 	start = sanitize_phys(start);
 	end = sanitize_phys(end);
-	BUG_ON(start >= end); /* end is exclusive */
+	if (start >= end) {
+		WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
+				start, end - 1, cattr_name(req_type));
+		return -EINVAL;
+	}
 
 	if (!pat_enabled()) {
 		/* This is identical to page table setting without PAT */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a5d7ed125337..0f4fe206dcc2 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -648,19 +648,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 			  unsigned long limit)
 {
 	int i, nr, flush = 0;
-	unsigned hole_low, hole_high;
+	unsigned hole_low = 0, hole_high = 0;
 
 	/* The limit is the last byte to be touched */
 	limit--;
 	BUG_ON(limit >= FIXADDR_TOP);
 
+#ifdef CONFIG_X86_64
 	/*
 	 * 64-bit has a great big hole in the middle of the address
-	 * space, which contains the Xen mappings.  On 32-bit these
-	 * will end up making a zero-sized hole and so is a no-op.
+	 * space, which contains the Xen mappings.
 	 */
-	hole_low = pgd_index(USER_LIMIT);
-	hole_high = pgd_index(PAGE_OFFSET);
+	hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
+	hole_high = pgd_index(GUARD_HOLE_END_ADDR);
+#endif
 
 	nr = pgd_index(limit) + 1;
 	for (i = 0; i < nr; i++) {
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index cdafa5edea49..20561a60db9c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -17,8 +17,10 @@
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
 
-struct bug_entry {
+#ifdef CONFIG_BUG
+
 #ifdef CONFIG_GENERIC_BUG
+struct bug_entry {
 #ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	unsigned long	bug_addr;
 #else
@@ -33,10 +35,8 @@ struct bug_entry {
 	unsigned short	line;
 #endif
 	unsigned short	flags;
-#endif	/* CONFIG_GENERIC_BUG */
 };
-
-#ifdef CONFIG_BUG
+#endif	/* CONFIG_GENERIC_BUG */
 
 /*
  * Don't use BUG() or BUG_ON() unless there's really no way out; one
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 06396c1cf127..fc5004a4b07d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,13 +99,22 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * unique, to convince GCC not to merge duplicate inline asm statements.
  */
 #define annotate_reachable() ({						\
-	asm volatile("ANNOTATE_REACHABLE counter=%c0"			\
-		     : : "i" (__COUNTER__));				\
+	asm volatile("%c0:\n\t"						\
+		     ".pushsection .discard.reachable\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
 })
 #define annotate_unreachable() ({					\
-	asm volatile("ANNOTATE_UNREACHABLE counter=%c0"			\
-		     : : "i" (__COUNTER__));				\
+	asm volatile("%c0:\n\t"						\
+		     ".pushsection .discard.unreachable\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
 })
+#define ASM_UNREACHABLE							\
+	"999:\n\t"							\
+	".pushsection .discard.unreachable\n\t"				\
+	".long 999b - .\n\t"						\
+	".popsection\n\t"
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
@@ -293,45 +302,6 @@ static inline void *offset_to_ptr(const int *off)
 	return (void *)((unsigned long)off + *off);
 }
 
-#else /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-#ifndef LINKER_SCRIPT
-
-#ifdef CONFIG_STACK_VALIDATION
-.macro ANNOTATE_UNREACHABLE counter:req
-\counter:
-	.pushsection .discard.unreachable
-	.long \counter\()b -.
-	.popsection
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-\counter:
-	.pushsection .discard.reachable
-	.long \counter\()b -.
-	.popsection
-.endm
-
-.macro ASM_UNREACHABLE
-999:
-	.pushsection .discard.unreachable
-	.long 999b - .
-	.popsection
-.endm
-#else /* CONFIG_STACK_VALIDATION */
-.macro ANNOTATE_UNREACHABLE counter:req
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-.endm
-
-.macro ASM_UNREACHABLE
-.endm
-#endif /* CONFIG_STACK_VALIDATION */
-
-#endif /* LINKER_SCRIPT */
-#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
 /* Compile time object size, -1 for unknown */
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index bb015551c2d9..3d09844405c9 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -115,9 +115,7 @@ __cc-option = $(call try-run,\
 
 # Do not attempt to build with gcc plugins during cc-option tests.
 # (And this uses delayed resolution so the flags will be up to date.)
-# In addition, do not include the asm macros which are built later.
-CC_OPTION_FILTERED = $(GCC_PLUGINS_CFLAGS) $(ASM_MACRO_FLAGS)
-CC_OPTION_CFLAGS = $(filter-out $(CC_OPTION_FILTERED),$(KBUILD_CFLAGS))
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index a5b4af47987a..42c5d50f2bcc 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -4,8 +4,6 @@ OBJECT_FILES_NON_STANDARD := y
 hostprogs-y	:= modpost mk_elfconfig
 always		:= $(hostprogs-y) empty.o
 
-CFLAGS_REMOVE_empty.o := $(ASM_MACRO_FLAGS)
-
 modpost-objs	:= modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-12-09 22:06 Ingo Molnar
@ 2018-12-09 23:45 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2018-12-09 23:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Sun, 9 Dec 2018 23:06:55 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8586ca8a214471e4573d76356aabe890bfecdc8a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-12-09 22:06 Ingo Molnar
  2018-12-09 23:45 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-12-09 22:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ac3e233d29f7f77f28243af0132057d378d3ea58 x86/vdso: Drop implicit common-page-size linker flag

Three fixes: a boot parameter re-(re-)fix, a retpoline build artifact fix 
and an LLVM workaround.

 Thanks,

	Ingo

------------------>
Juergen Gross (1):
      x86/boot: Clear RSDP address in boot_params for broken loaders

Masahiro Yamada (1):
      x86/build: Fix compiler support check for CONFIG_RETPOLINE

Nick Desaulniers (1):
      x86/vdso: Drop implicit common-page-size linker flag


 arch/x86/Makefile                      | 10 +++++++---
 arch/x86/entry/vdso/Makefile           |  4 ++--
 arch/x86/include/asm/bootparam_utils.h |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f5d7f4134524..75ef499a66e2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,9 +220,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-ifeq ($(RETPOLINE_CFLAGS),)
-  $(error You are building kernel with non-retpoline compiler, please update your compiler.)
-endif
   KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
 endif
 
@@ -307,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO
 	@echo Compiler lacks asm-goto support.
 	@exit 1
 endif
+ifdef CONFIG_RETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+	@echo "You are building kernel with non-retpoline compiler." >&2
+	@echo "Please update your compiler." >&2
+	@false
+endif
+endif
 
 archclean:
 	$(Q)rm -rf $(objtree)/arch/i386
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 141d415a8c80..0624bf2266fd 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
 CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
-			-z max-page-size=4096 -z common-page-size=4096
+			-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
@@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg
 
 CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
-			   -z max-page-size=4096 -z common-page-size=4096
+			   -z max-page-size=4096
 
 # x32-rebranded versions
 vobjx32s-y := $(vobjs-y:.o=-x32.o)
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index a07ffd23e4dd..f6f6ef436599 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 	 */
 	if (boot_params->sentinel) {
 		/* fields in boot_params are left uninitialized, clear them */
+		boot_params->acpi_rsdp_addr = 0;
 		memset(&boot_params->ext_ramdisk_image, 0,
 		       (char *)&boot_params->efi_info -
 			(char *)&boot_params->ext_ramdisk_image);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-11-30  6:29 Ingo Molnar
@ 2018-11-30 21:00 ` pr-tracker-bot
  0 siblings, 0 replies; 539+ messages in thread
From: pr-tracker-bot @ 2018-11-30 21:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

The pull request you sent on Fri, 30 Nov 2018 07:29:06 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1ec63573b2db363848abb313cc75eb29e9abc1b3

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-11-30  6:29 Ingo Molnar
  2018-11-30 21:00 ` pr-tracker-bot
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-11-30  6:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 60c8144afc287ef09ce8c1230c6aa972659ba1bb x86/MCE/AMD: Fix the thresholding machinery initialization order

Misc fixes:

 - an MCE related boot crash fix on certain AMD systems
 - an FPU exception handling fix
 - an FPU handling race fix
 - a revert+rewrite of the RSDP boot protocol extension, use boot_params 
   instead
 - a documentation fix

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/MCE/AMD: Fix the thresholding machinery initialization order

Elvira Khabirova (1):
      x86/ptrace: Fix documentation for tracehook_report_syscall_entry()

Jann Horn (1):
      x86/fpu: Use the correct exception table macro in the XSTATE_OP wrapper

Juergen Gross (2):
      x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header")
      x86/acpi, x86/boot: Take RSDP address from boot params if available

Sebastian Andrzej Siewior (1):
      x86/fpu: Disable bottom halves while loading FPU registers


 Documentation/x86/boot.txt            | 32 +-------------------------------
 arch/x86/boot/header.S                |  6 +-----
 arch/x86/include/asm/fpu/internal.h   |  2 +-
 arch/x86/include/asm/x86_init.h       |  2 --
 arch/x86/include/uapi/asm/bootparam.h |  7 ++-----
 arch/x86/kernel/acpi/boot.c           |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c  | 19 ++++++-------------
 arch/x86/kernel/fpu/signal.c          |  4 ++--
 arch/x86/kernel/head32.c              |  1 -
 arch/x86/kernel/head64.c              |  2 --
 arch/x86/kernel/setup.c               | 17 -----------------
 include/linux/tracehook.h             |  4 ++--
 12 files changed, 16 insertions(+), 82 deletions(-)

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7727db8f94bc..5e9b826b5f62 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -61,18 +61,6 @@ Protocol 2.12:	(Kernel 3.8) Added the xloadflags field and extension fields
 	 	to struct boot_params for loading bzImage and ramdisk
 		above 4G in 64bit.
 
-Protocol 2.13:	(Kernel 3.14) Support 32- and 64-bit flags being set in
-		xloadflags to support booting a 64-bit kernel from 32-bit
-		EFI
-
-Protocol 2.14:	(Kernel 4.20) Added acpi_rsdp_addr holding the physical
-		address of the ACPI RSDP table.
-		The bootloader updates version with:
-		0x8000 | min(kernel-version, bootloader-version)
-		kernel-version being the protocol version supported by
-		the kernel and bootloader-version the protocol version
-		supported by the bootloader.
-
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -209,7 +197,6 @@ Offset	Proto	Name		Meaning
 0258/8	2.10+	pref_address	Preferred loading address
 0260/4	2.10+	init_size	Linear memory required during initialization
 0264/4	2.11+	handover_offset	Offset of handover entry point
-0268/8	2.14+	acpi_rsdp_addr	Physical address of RSDP table
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -322,7 +309,7 @@ Protocol:	2.00+
   Contains the magic number "HdrS" (0x53726448).
 
 Field name:	version
-Type:		modify
+Type:		read
 Offset/size:	0x206/2
 Protocol:	2.00+
 
@@ -330,12 +317,6 @@ Protocol:	2.00+
   e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
   10.17.
 
-  Up to protocol version 2.13 this information is only read by the
-  bootloader. From protocol version 2.14 onwards the bootloader will
-  write the used protocol version or-ed with 0x8000 to the field. The
-  used protocol version will be the minimum of the supported protocol
-  versions of the bootloader and the kernel.
-
 Field name:	realmode_swtch
 Type:		modify (optional)
 Offset/size:	0x208/4
@@ -763,17 +744,6 @@ Offset/size:	0x264/4
 
   See EFI HANDOVER PROTOCOL below for more details.
 
-Field name:	acpi_rsdp_addr
-Type:		write
-Offset/size:	0x268/8
-Protocol:	2.14+
-
-  This field can be set by the boot loader to tell the kernel the
-  physical address of the ACPI RSDP table.
-
-  A value of 0 indicates the kernel should fall back to the standard
-  methods to locate the RSDP.
-
 
 **** THE IMAGE CHECKSUM
 
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4c881c850125..850b8762e889 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x020e		# header version number (>= 0x0105)
+		.word	0x020d		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -558,10 +558,6 @@ pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 init_size:		.long INIT_SIZE		# kernel initialization size
 handover_offset:	.long 0			# Filled in by build.c
 
-acpi_rsdp_addr:		.quad 0			# 64-bit physical pointer to the
-						# ACPI RSDP table, added with
-						# version 2.14
-
 # End of setup header #####################################################
 
 	.section ".entrytext", "ax"
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5f7290e6e954..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 		     "3: movl $-2,%[err]\n\t"				\
 		     "jmp 2b\n\t"					\
 		     ".popsection\n\t"					\
-		     _ASM_EXTABLE_UA(1b, 3b)				\
+		     _ASM_EXTABLE(1b, 3b)				\
 		     : [err] "=r" (err)					\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f842104862c..b85a7c54c6a1 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -303,6 +303,4 @@ extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
 extern bool x86_pnpbios_disabled(void);
 
-void x86_verify_bootdata_version(void);
-
 #endif
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 22f89d040ddd..60733f137e9a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -16,9 +16,6 @@
 #define RAMDISK_PROMPT_FLAG		0x8000
 #define RAMDISK_LOAD_FLAG		0x4000
 
-/* version flags */
-#define VERSION_WRITTEN	0x8000
-
 /* loadflags */
 #define LOADED_HIGH	(1<<0)
 #define KASLR_FLAG	(1<<1)
@@ -89,7 +86,6 @@ struct setup_header {
 	__u64	pref_address;
 	__u32	init_size;
 	__u32	handover_offset;
-	__u64	acpi_rsdp_addr;
 } __attribute__((packed));
 
 struct sys_desc_table {
@@ -159,7 +155,8 @@ struct boot_params {
 	__u8  _pad2[4];					/* 0x054 */
 	__u64  tboot_addr;				/* 0x058 */
 	struct ist_info ist_info;			/* 0x060 */
-	__u8  _pad3[16];				/* 0x070 */
+	__u64 acpi_rsdp_addr;				/* 0x070 */
+	__u8  _pad3[8];					/* 0x078 */
 	__u8  hd0_info[16];	/* obsolete! */		/* 0x080 */
 	__u8  hd1_info[16];	/* obsolete! */		/* 0x090 */
 	struct sys_desc_table sys_desc_table; /* obsolete! */	/* 0x0a0 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 92c76bf97ad8..06635fbca81c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 
 u64 x86_default_get_root_pointer(void)
 {
-	return boot_params.hdr.acpi_rsdp_addr;
+	return boot_params.acpi_rsdp_addr;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f..e12454e21b8a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF	0xF000
 
-static bool thresholding_en;
+static bool thresholding_irq_en;
 
 static const char * const th_names[] = {
 	"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 
 set_offset:
 	offset = setup_APIC_mce_threshold(offset, new);
-
-	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
-		mce_threshold_vector = amd_threshold_interrupt;
+	if (offset == new)
+		thresholding_irq_en = true;
 
 done:
 	mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
 {
 	unsigned int bank;
 
-	if (!thresholding_en)
-		return 0;
-
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
 	struct threshold_bank **bp;
 	int err = 0;
 
-	if (!thresholding_en)
-		return 0;
-
 	bp = per_cpu(threshold_banks, cpu);
 	if (bp)
 		return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
 {
 	unsigned lcpu = 0;
 
-	if (mce_threshold_vector == amd_threshold_interrupt)
-		thresholding_en = true;
-
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
 			return err;
 	}
 
+	if (thresholding_irq_en)
+		mce_threshold_vector = amd_threshold_interrupt;
+
 	return 0;
 }
 /*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa..d99a8ee9e185 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
+		local_bh_disable();
 		fpu->initialized = 1;
-		preempt_disable();
 		fpu__restore(fpu);
-		preempt_enable();
+		local_bh_enable();
 
 		return err;
 	} else {
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 76fa3b836598..ec6fefbfd3c0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void)
 	cr4_init_shadow();
 
 	sanitize_boot_params(&boot_params);
-	x86_verify_bootdata_version();
 
 	x86_early_init_platform_quirks();
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7663a8eb602b..16b1cbd3a61e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 	if (!boot_params.hdr.version)
 		copy_bootdata(__va(real_mode_data));
 
-	x86_verify_bootdata_version();
-
 	x86_early_init_platform_quirks();
 
 	switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab..d494b9bfe618 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p)
 	unwind_init();
 }
 
-/*
- * From boot protocol 2.14 onwards we expect the bootloader to set the
- * version to "0x8000 | <used version>". In case we find a version >= 2.14
- * without the 0x8000 we assume the boot loader supports 2.13 only and
- * reset the version accordingly. The 0x8000 flag is removed in any case.
- */
-void __init x86_verify_bootdata_version(void)
-{
-	if (boot_params.hdr.version & VERSION_WRITTEN)
-		boot_params.hdr.version &= ~VERSION_WRITTEN;
-	else if (boot_params.hdr.version >= 0x020e)
-		boot_params.hdr.version = 0x020d;
-
-	if (boot_params.hdr.version < 0x020e)
-		boot_params.hdr.acpi_rsdp_addr = 0;
-}
-
 #ifdef CONFIG_X86_32
 
 static struct resource video_ram_resource = {
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b..df20f8bdbfa3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-11-03 23:09 Ingo Molnar
@ 2018-11-04  1:27 ` Linus Torvalds
  0 siblings, 0 replies; 539+ messages in thread
From: Linus Torvalds @ 2018-11-04  1:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linux Kernel Mailing List, tglx, bp, Peter Zijlstra, Andrew Morton

On Sat, Nov 3, 2018 at 4:09 PM Ingo Molnar <mingo@kernel.org> wrote:
>
> A number of fixes and some late updates:

Pulled,

             Linus

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-11-03 23:09 Ingo Molnar
  2018-11-04  1:27 ` Linus Torvalds
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-11-03 23:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 23a12ddee1ce28065b71f14ccc695b5a0c8a64ff Merge branch 'core/urgent' into x86/urgent, to pick up objtool fix

A number of fixes and some late updates:

  - make in_compat_syscall() behavior on x86-32 similar to other 
    platforms, this touches a number of generic files but is not intended 
    to impact non-x86 platforms.

  - objtool fixes

  - PAT preemption fix

  - paravirt fixes/cleanups

  - cpufeatures updates for new instructions

  - earlyprintk quirk

  - make microcode version in sysfs world-readable (it is already world-readable in procfs) 

  - minor cleanups and fixes

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/firmware/efi/efivars.c     # 98f76206b335: compat: Cleanup in_compat_sy
  include/linux/compat.h             # a846446b1914: x86/compat: Adjust in_compat
  kernel/time/time.c                 # 98f76206b335: compat: Cleanup in_compat_sy
  net/xfrm/xfrm_state.c              # 98f76206b335: compat: Cleanup in_compat_sy
  net/xfrm/xfrm_user.c               # 98f76206b335: compat: Cleanup in_compat_sy
  tools/objtool/check.c              # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/check.h              # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/elf.c                # bcb6fb5da77c: objtool: Support GCC 9 cold 
                                   # 4a60aa05a063: objtool: Support per-functio
  tools/objtool/elf.h                # 4a60aa05a063: objtool: Support per-functio

 Thanks,

	Ingo

------------------>
Allan Xavier (1):
      objtool: Support per-function rodata sections

Dave Jiang (1):
      x86/numa_emulation: Fix uniform-split numa emulation

Dmitry Safonov (2):
      x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT
      compat: Cleanup in_compat_syscall() callers

Feng Tang (1):
      x86/earlyprintk: Add a force option for pciserial device

Fenghua Yu (2):
      x86/cpufeatures: Enumerate MOVDIRI instruction
      x86/cpufeatures: Enumerate MOVDIR64B instruction

Jacek Tomaka (1):
      x86/microcode: Make revision and processor flags world-readable

Jordan Borgner (1):
      x86: Clean up 'sizeof x' => 'sizeof(x)'

Josh Poimboeuf (1):
      objtool: Support GCC 9 cold subfunction naming scheme

Juergen Gross (2):
      x86/paravirt: Remove GPL from pv_ops export
      x86/paravirt: Remove unused _paravirt_ident_32

Rasmus Villemoes (1):
      x86/traps: Use format string with panic() call

Sebastian Andrzej Siewior (1):
      x86/mm/pat: Disable preemption around __flush_tlb_all()


 Documentation/admin-guide/kernel-parameters.txt |  6 +++-
 arch/x86/boot/cpucheck.c                        |  2 +-
 arch/x86/boot/early_serial_console.c            |  4 +--
 arch/x86/boot/edd.c                             |  6 ++--
 arch/x86/boot/main.c                            |  4 +--
 arch/x86/boot/memory.c                          |  2 +-
 arch/x86/boot/regs.c                            |  2 +-
 arch/x86/boot/video-vesa.c                      |  6 ++--
 arch/x86/boot/video.c                           |  2 +-
 arch/x86/events/intel/core.c                    |  2 +-
 arch/x86/include/asm/compat.h                   |  9 +++++-
 arch/x86/include/asm/cpufeatures.h              |  2 ++
 arch/x86/include/asm/ftrace.h                   |  4 +--
 arch/x86/include/asm/paravirt_types.h           |  2 --
 arch/x86/include/asm/tlbflush.h                 |  6 ++++
 arch/x86/kernel/cpu/common.c                    |  4 +--
 arch/x86/kernel/cpu/mcheck/mce.c                |  2 +-
 arch/x86/kernel/cpu/microcode/core.c            |  4 +--
 arch/x86/kernel/cpu/mtrr/generic.c              |  2 +-
 arch/x86/kernel/cpu/mtrr/if.c                   |  6 ++--
 arch/x86/kernel/early_printk.c                  | 29 +++++++++++------
 arch/x86/kernel/head64.c                        |  2 +-
 arch/x86/kernel/msr.c                           |  8 ++---
 arch/x86/kernel/paravirt.c                      | 28 +++++------------
 arch/x86/kernel/paravirt_patch_32.c             | 18 ++++-------
 arch/x86/kernel/paravirt_patch_64.c             | 20 ++++--------
 arch/x86/kernel/process_64.c                    |  4 +--
 arch/x86/kernel/sys_x86_64.c                    | 11 ++++---
 arch/x86/kernel/traps.c                         |  2 +-
 arch/x86/kvm/emulate.c                          | 22 ++++++-------
 arch/x86/kvm/lapic.c                            |  2 +-
 arch/x86/kvm/x86.c                              | 42 ++++++++++++-------------
 arch/x86/mm/hugetlbpage.c                       |  4 +--
 arch/x86/mm/mmap.c                              |  2 +-
 arch/x86/mm/numa_emulation.c                    | 12 +++++--
 arch/x86/mm/pageattr.c                          |  6 +++-
 arch/x86/tools/relocs.c                         |  4 +--
 arch/x86/um/asm/elf.h                           |  2 +-
 drivers/firmware/efi/efivars.c                  | 16 +++-------
 include/linux/compat.h                          |  4 +--
 kernel/time/time.c                              |  2 +-
 net/xfrm/xfrm_state.c                           |  2 --
 net/xfrm/xfrm_user.c                            |  2 --
 tools/objtool/check.c                           | 38 ++++++++++++++++++----
 tools/objtool/check.h                           |  4 +--
 tools/objtool/elf.c                             |  3 +-
 tools/objtool/elf.h                             |  3 +-
 47 files changed, 198 insertions(+), 171 deletions(-)


^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-10-20  8:54 Ingo Molnar
@ 2018-10-20 13:28 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 539+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-20 13:28 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

On Sat, Oct 20, 2018 at 10:54:25AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest x86-urgent-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

Now merged, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-10-20  8:54 Ingo Molnar
  2018-10-20 13:28 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-10-20  8:54 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 485734f3fc77c1eb77ffe138c027b9a4bf0178f3 x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels

It's 4 misc fixes, 3 build warning fixes and 3 comment fixes.

In hindsight I'd have left out the 3 comment fixes to make the pull 
request look less scary at such a late point in the cycle. :-/

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/entry/64: Further improve paranoid_entry comments

Christoph Hellwig (1):
      x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels

Dave Hansen (1):
      x86/entry: Add some paranoid entry/exit CR3 handling comments

Jan Kiszka (1):
      x86/entry/32: Clear the CS high bits

Nathan Chancellor (2):
      x86/time: Correct the attribute on jiffies' definition
      x86/boot: Add -Wno-pointer-sign to KBUILD_CFLAGS

Peter Zijlstra (2):
      x86/tsc: Force inlining of cyc2ns bits
      x86/percpu: Fix this_cpu_read()

Sebastian Andrzej Siewior (2):
      x86/fpu: Remove second definition of fpu in __fpu__restore_sig()
      x86/fpu: Fix i486 + no387 boot crash by only saving FPU registers on context switch if there is an FPU


 arch/x86/boot/compressed/Makefile   |  1 +
 arch/x86/entry/entry_32.S           | 13 +++++++------
 arch/x86/entry/entry_64.S           | 13 +++++++++++++
 arch/x86/include/asm/fpu/internal.h |  2 +-
 arch/x86/include/asm/percpu.h       |  8 ++++----
 arch/x86/kernel/fpu/signal.c        |  1 -
 arch/x86/kernel/pci-swiotlb.c       |  2 --
 arch/x86/kernel/time.c              |  2 +-
 arch/x86/kernel/tsc.c               |  6 +++---
 9 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	/*
+	 * The high bits of the CS dword (__csh) are used for
+	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+	 * hardware didn't do this for us.
+	 */
+	andl	$(0x0000ffff), PT_CS(%esp)
+
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
 	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
 	/* Load top of task-stack into %edi */
 	movl	TSS_entry2task_stack(%edi), %edi
 
-	/*
-	 * Clear unused upper bits of the dword containing the word-sized CS
-	 * slot in pt_regs in case hardware didn't clear it for us.
-	 */
-	andl	$(0x0000ffff), PT_CS(%esp)
-
 	/* Special case - entry from kernel mode via entry stack */
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
 	xorl	%ebx, %ebx
 
 1:
+	/*
+	 * Always stash CR3 in %r14.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
 	ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
 	jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
 	movq	$-1, %rsi
 	call	do_nmi
 
+	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
 
 	testl	%ebx, %ebx			/* swapgs needed? */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->initialized) {
+	if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do {									\
 	typeof(var) pfo_ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b "__percpu_arg(1)",%0"		\
+		asm volatile(op "b "__percpu_arg(1)",%0"\
 		    : "=q" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-		asm(op "w "__percpu_arg(1)",%0"		\
+		asm volatile(op "w "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 4:						\
-		asm(op "l "__percpu_arg(1)",%0"		\
+		asm volatile(op "l "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 8:						\
-		asm(op "q "__percpu_arg(1)",%0"		\
+		asm volatile(op "q "__percpu_arg(1)",%0"\
 		    : "=r" (pfo_ret__)			\
 		    : "m" (var));			\
 		break;					\
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 * thread's fpu state, reconstruct fxstate from the fsave
 		 * header. Validate and sanitize the copied state.
 		 */
-		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
 int __init pci_swiotlb_detect_4gb(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
 	if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
 		swiotlb = 1;
-#endif
 
 	/*
 	 * If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
 #include <asm/time.h>
 
 #ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 #endif
 
 unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b52bd2b6cdb4..6d5dc5dabfd7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -58,7 +58,7 @@ struct cyc2ns {
 
 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
 {
 	int seq, idx;
 
@@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
 	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
 }
 
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
 {
 	preempt_enable_notrace();
 }
@@ -104,7 +104,7 @@ void cyc2ns_read_end(void)
  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
 	struct cyc2ns_data data;
 	unsigned long long ns;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-10-11  9:14 Ingo Molnar
@ 2018-10-11 12:32 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 539+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-11 12:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

On Thu, Oct 11, 2018 at 11:14:16AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest x86-urgent-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

Now merged, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-10-11  9:14 Ingo Molnar
  2018-10-11 12:32 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-10-11  9:14 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 184d47f0fd365108bd06ab26cdb3450b716269fd x86/mm: Avoid VLA in pgd_alloc()

An intel_rdt memory access fix and a VLA fix in pgd_alloc().

 Thanks,

	Ingo

------------------>
Kees Cook (1):
      x86/mm: Avoid VLA in pgd_alloc()

Reinette Chatre (1):
      x86/intel_rdt: Fix out-of-bounds memory access in CBM tests


 arch/x86/kernel/cpu/intel_rdt.h             |  6 ++---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 20 ++++++++--------
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c    | 36 ++++++++++++++++++-----------
 arch/x86/mm/pgtable.c                       | 10 ++++++--
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 285eb3ec4200..3736f6dc9545 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 int rdtgroup_schemata_show(struct kernfs_open_file *of,
 			   struct seq_file *s, void *v);
 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   u32 _cbm, int closid, bool exclusive);
+			   unsigned long cbm, int closid, bool exclusive);
 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
-				  u32 cbm);
+				  unsigned long cbm);
 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
 int rdtgroup_tasks_assigned(struct rdtgroup *r);
 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
 int rdt_pseudo_lock_init(void);
 void rdt_pseudo_lock_release(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 40f3903ae5d9..f8c260d522ca 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
 /**
  * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
  * @d: RDT domain
- * @_cbm: CBM to test
+ * @cbm: CBM to test
  *
- * @d represents a cache instance and @_cbm a capacity bitmask that is
- * considered for it. Determine if @_cbm overlaps with any existing
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
  * pseudo-locked region on @d.
  *
- * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
  * otherwise.
  */
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
 {
-	unsigned long *cbm = (unsigned long *)&_cbm;
-	unsigned long *cbm_b;
 	unsigned int cbm_len;
+	unsigned long cbm_b;
 
 	if (d->plr) {
 		cbm_len = d->plr->r->cache.cbm_len;
-		cbm_b = (unsigned long *)&d->plr->cbm;
-		if (bitmap_intersects(cbm, cbm_b, cbm_len))
+		cbm_b = d->plr->cbm;
+		if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
 			return true;
 	}
 	return false;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 1b8e86a5d5e1..b140c68bc14b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
  * is false then overlaps with any resource group or hardware entities
  * will be considered.
  *
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
  * Return: false if CBM does not overlap, true if it does.
  */
 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   u32 _cbm, int closid, bool exclusive)
+			   unsigned long cbm, int closid, bool exclusive)
 {
-	unsigned long *cbm = (unsigned long *)&_cbm;
-	unsigned long *ctrl_b;
 	enum rdtgrp_mode mode;
+	unsigned long ctrl_b;
 	u32 *ctrl;
 	int i;
 
 	/* Check for any overlap with regions used by hardware directly */
 	if (!exclusive) {
-		if (bitmap_intersects(cbm,
-				      (unsigned long *)&r->cache.shareable_bits,
-				      r->cache.cbm_len))
+		ctrl_b = r->cache.shareable_bits;
+		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
 			return true;
 	}
 
 	/* Check for overlap with other resource groups */
 	ctrl = d->ctrl_val;
 	for (i = 0; i < closids_supported(); i++, ctrl++) {
-		ctrl_b = (unsigned long *)ctrl;
+		ctrl_b = *ctrl;
 		mode = rdtgroup_mode_by_closid(i);
 		if (closid_allocated(i) && i != closid &&
 		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-			if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
 				if (exclusive) {
 					if (mode == RDT_MODE_EXCLUSIVE)
 						return true;
@@ -1138,15 +1139,18 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
  * computed by first dividing the total cache size by the CBM length to
  * determine how many bytes each bit in the bitmask represents. The result
  * is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
  */
 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-				  struct rdt_domain *d, u32 cbm)
+				  struct rdt_domain *d, unsigned long cbm)
 {
 	struct cpu_cacheinfo *ci;
 	unsigned int size = 0;
 	int num_b, i;
 
-	num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
 	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
 	for (i = 0; i < ci->num_leaves; i++) {
 		if (ci->info_list[i].level == r->cache_level) {
@@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 	u32 used_b = 0, unused_b = 0;
 	u32 closid = rdtgrp->closid;
 	struct rdt_resource *r;
+	unsigned long tmp_cbm;
 	enum rdtgrp_mode mode;
 	struct rdt_domain *d;
 	int i, ret;
@@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 			 * modify the CBM based on system availability.
 			 */
 			cbm_ensure_valid(&d->new_ctrl, r);
-			if (bitmap_weight((unsigned long *) &d->new_ctrl,
-					  r->cache.cbm_len) <
-					r->cache.min_cbm_bits) {
+			/*
+			 * Assign the u32 CBM to an unsigned long to ensure
+			 * that bitmap_weight() does not access out-of-bound
+			 * memory.
+			 */
+			tmp_cbm = d->new_ctrl;
+			if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+			    r->cache.min_cbm_bits) {
 				rdt_last_cmd_printf("no space on %s:%d\n",
 						    r->name, d->id);
 				return -ENOSPC;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..59274e2c1ac4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd)
 
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+#define MAX_UNSHARED_PTRS_PER_PGD			\
+	max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
 
 
 static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
@@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd)
  * and initialize the kernel pmds here.
  */
 #define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD
+#define MAX_PREALLOCATED_PMDS	MAX_UNSHARED_PTRS_PER_PGD
 
 /*
  * We allocate separate PMDs for the kernel part of the user page-table
@@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd)
  */
 #define PREALLOCATED_USER_PMDS	 (static_cpu_has(X86_FEATURE_PTI) ? \
 					KERNEL_PGD_PTRS : 0)
+#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
 
 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 {
@@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 
 /* No need to prepopulate any pagetable entries in non-PAE modes. */
 #define PREALLOCATED_PMDS	0
+#define MAX_PREALLOCATED_PMDS	0
 #define PREALLOCATED_USER_PMDS	 0
+#define MAX_PREALLOCATED_USER_PMDS 0
 #endif	/* CONFIG_X86_PAE */
 
 static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
@@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd)
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
-	pmd_t *u_pmds[PREALLOCATED_USER_PMDS];
-	pmd_t *pmds[PREALLOCATED_PMDS];
+	pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
+	pmd_t *pmds[MAX_PREALLOCATED_PMDS];
 
 	pgd = _pgd_alloc();
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-10-05  9:53 Ingo Molnar
@ 2018-10-05 23:06 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 539+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-05 23:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Andy Lutomirski

On Fri, Oct 05, 2018 at 11:53:54AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest x86-urgent-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

Now merged, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-10-05  9:53 Ingo Molnar
  2018-10-05 23:06 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-10-05  9:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Andy Lutomirski

Greg,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 02e425668f5c9deb42787d10001a3b605993ad15 x86/vdso: Fix vDSO syscall fallback asm constraint regression

Misc fixes:

 - fix various vDSO bugs: asm constraints and retpolines
 - add vDSO test units to make sure they never re-appear
 - fix UV platform TSC initialization bug
 - fix build warning on Clang

 Thanks,

	Ingo

------------------>
Andy Lutomirski (4):
      x86/vdso: Fix asm constraints on vDSO syscall fallbacks
      selftests/x86: Add clock_gettime() tests to test_vdso
      x86/vdso: Only enable vDSO retpolines when enabled and supported
      x86/vdso: Fix vDSO syscall fallback asm constraint regression

Mike Travis (2):
      x86/platform/uv: Provide is_early_uv_system()
      x86/tsc: Fix UV TSC initialization

Nathan Chancellor (1):
      x86/cpu/amd: Remove unnecessary parentheses


 arch/x86/entry/vdso/Makefile            |  16 ++-
 arch/x86/entry/vdso/vclock_gettime.c    |  26 ++---
 arch/x86/include/asm/uv/uv.h            |   6 ++
 arch/x86/kernel/cpu/amd.c               |   2 +-
 arch/x86/kernel/tsc.c                   |   4 +
 tools/testing/selftests/x86/test_vdso.c | 172 ++++++++++++++++++++++++++++++++
 6 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
 
 $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
 $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
 $(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*ts) :
+	     "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+	     "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[clock], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
-		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+		: "=a" (ret), "=m" (*ts)
+		: "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
 		: "memory", "edx");
 	return ret;
 }
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[tv], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
-		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+		: "=a" (ret), "=m" (*tv), "=m" (*tz)
+		: "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
 		: "memory", "edx");
 	return ret;
 }
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
 struct mm_struct;
 
 #ifdef CONFIG_X86_UV
+#include <linux/efi.h>
 
 extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
 extern int is_uv_system(void);
 extern int is_uv_hubless(void);
 extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 #else	/* X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
 static inline int is_uv_hubless(void)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
-	if ((c->x86 == 6)) {
+	if (c->x86 == 6) {
 		/* Duron Rev A0 */
 		if (c->x86_model == 3 && c->x86_stepping == 0)
 			size = 64;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..b52bd2b6cdb4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
 #include <asm/apic.h>
 #include <asm/intel-family.h>
 #include <asm/i8259.h>
+#include <asm/uv/uv.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return;
+	/* Don't change UV TSC multi-chassis synchronization */
+	if (is_early_uv_system())
+		return;
 	if (!determine_cpu_tsc_frequencies(true))
 		return;
 	loops_per_jiffy = get_loops_per_jiffy();
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <limits.h>
 
 #ifndef SYS_getcpu
 # ifdef __x86_64__
@@ -31,6 +32,14 @@
 
 int nerrs = 0;
 
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
 
 getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
 	return syscall(__NR_getcpu, cpu, node, cache);
 }
 
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
 static void test_getcpu(void)
 {
 	printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
 	}
 }
 
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
 int main(int argc, char **argv)
 {
 	fill_function_pointers();
 
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
 	test_getcpu();
 
 	return nerrs ? 1 : 0;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-09-15 13:24 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-09-15 13:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 002b87d2aace62b4f3841c3aa43309d2380092be x86/APM: Fix build warning when PROC_FS is not enabled

Misc fixes:

 - EFI crash fix
 - Xen PV fixes
 - do not allow PTI on 2-level 32-bit kernels for now
 - documentation fix

 Thanks,

	Ingo

------------------>
Boris Ostrovsky (1):
      x86/EISA: Don't probe EISA bus for Xen PV guests

Guenter Roeck (1):
      x86/efi: Load fixmap GDT in efi_call_phys_epilog() before setting %cr3

Joerg Roedel (1):
      Revert "x86/mm/legacy: Populate the user page-table with user pgd's"

Juergen Gross (1):
      x86/xen: Disable CPU0 hotplug for Xen PV

Randy Dunlap (2):
      x86/doc: Fix Documentation/x86/earlyprintk.txt
      x86/APM: Fix build warning when PROC_FS is not enabled


 Documentation/x86/earlyprintk.txt     | 25 +++++++++++++++----------
 arch/x86/include/asm/pgtable-2level.h |  9 ---------
 arch/x86/kernel/apm_32.c              |  2 ++
 arch/x86/kernel/eisa.c                | 10 ++++++++--
 arch/x86/kernel/topology.c            |  4 +++-
 arch/x86/platform/efi/efi_32.c        |  3 +--
 security/Kconfig                      |  2 +-
 7 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
index 688e3eeed21d..46933e06c972 100644
--- a/Documentation/x86/earlyprintk.txt
+++ b/Documentation/x86/earlyprintk.txt
@@ -35,25 +35,25 @@ and two USB cables, connected like this:
 ( If your system does not list a debug port capability then you probably
   won't be able to use the USB debug key. )
 
- b.) You also need a Netchip USB debug cable/key:
+ b.) You also need a NetChip USB debug cable/key:
 
         http://www.plxtech.com/products/NET2000/NET20DC/default.asp
 
-     This is a small blue plastic connector with two USB connections,
+     This is a small blue plastic connector with two USB connections;
      it draws power from its USB connections.
 
  c.) You need a second client/console system with a high speed USB 2.0
      port.
 
- d.) The Netchip device must be plugged directly into the physical
+ d.) The NetChip device must be plugged directly into the physical
      debug port on the "host/target" system.  You cannot use a USB hub in
      between the physical debug port and the "host/target" system.
 
      The EHCI debug controller is bound to a specific physical USB
-     port and the Netchip device will only work as an early printk
+     port and the NetChip device will only work as an early printk
      device in this port.  The EHCI host controllers are electrically
      wired such that the EHCI debug controller is hooked up to the
-     first physical and there is no way to change this via software.
+     first physical port and there is no way to change this via software.
      You can find the physical port through experimentation by trying
      each physical port on the system and rebooting.  Or you can try
      and use lsusb or look at the kernel info messages emitted by the
@@ -65,9 +65,9 @@ and two USB cables, connected like this:
      to the hardware vendor, because there is no reason not to wire
      this port into one of the physically accessible ports.
 
- e.) It is also important to note, that many versions of the Netchip
+ e.) It is also important to note, that many versions of the NetChip
      device require the "client/console" system to be plugged into the
-     right and side of the device (with the product logo facing up and
+     right hand side of the device (with the product logo facing up and
      readable left to right).  The reason being is that the 5 volt
      power supply is taken from only one side of the device and it
      must be the side that does not get rebooted.
@@ -81,13 +81,18 @@ and two USB cables, connected like this:
       CONFIG_EARLY_PRINTK_DBGP=y
 
     And you need to add the boot command line: "earlyprintk=dbgp".
+
     (If you are using Grub, append it to the 'kernel' line in
-     /etc/grub.conf)
+     /etc/grub.conf.  If you are using Grub2 on a BIOS firmware system,
+     append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
+     using Grub2 on an EFI firmware system, append it to the 'linux'
+     or 'linuxefi' line in /boot/grub2/grub.cfg or
+     /boot/efi/EFI/<distro>/grub.cfg.)
 
     On systems with more than one EHCI debug controller you must
     specify the correct EHCI debug controller number.  The ordering
     comes from the PCI bus enumeration of the EHCI controllers.  The
-    default with no number argument is "0" the first EHCI debug
+    default with no number argument is "0" or the first EHCI debug
     controller.  To use the second EHCI debug controller, you would
     use the command line: "earlyprintk=dbgp1"
 
@@ -111,7 +116,7 @@ and two USB cables, connected like this:
     see the raw output.
 
  c.) On Nvidia Southbridge based systems: the kernel will try to probe
-     and find out which port has debug device connected.
+     and find out which port has a debug device connected.
 
 3. Testing that it works fine:
 
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 24c6cf5f16b7..60d0f9015317 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -19,9 +19,6 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
-#endif
 	*pmdp = pmd;
 }
 
@@ -61,9 +58,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
 #ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
-#endif
 	return __pmd(xchg((pmdval_t *)xp, 0));
 }
 #else
@@ -73,9 +67,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 #ifdef CONFIG_SMP
 static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
-#endif
 	return __pud(xchg((pudval_t *)xp, 0));
 }
 #else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ec00d1ff5098..f7151cd03cb0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+#ifdef CONFIG_PROC_FS
 static int proc_apm_show(struct seq_file *m, void *v)
 {
 	unsigned short	bx;
@@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v)
 		   units);
 	return 0;
 }
+#endif
 
 static int apm(void *unused)
 {
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index f260e452e4f8..e8c8c5d78dbd 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -7,11 +7,17 @@
 #include <linux/eisa.h>
 #include <linux/io.h>
 
+#include <xen/xen.h>
+
 static __init int eisa_bus_probe(void)
 {
-	void __iomem *p = ioremap(0x0FFFD9, 4);
+	void __iomem *p;
+
+	if (xen_pv_domain() && !xen_initial_domain())
+		return 0;
 
-	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+	p = ioremap(0x0FFFD9, 4);
+	if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
 		EISA_bus = 1;
 	iounmap(p);
 	return 0;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 12cbe2b88c0f..738bf42b0218 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -111,8 +111,10 @@ int arch_register_cpu(int num)
 	/*
 	 * Currently CPU0 is only hotpluggable on Intel platforms. Other
 	 * vendors can add hotplug support later.
+	 * Xen PV guests don't support CPU0 hotplug at all.
 	 */
-	if (c->x86_vendor != X86_VENDOR_INTEL)
+	if (c->x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_has(X86_FEATURE_XENPV))
 		cpu0_hotpluggable = 0;
 
 	/*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 05ca14222463..9959657127f4 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,10 +85,9 @@ pgd_t * __init efi_call_phys_prolog(void)
 
 void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
+	load_fixmap_gdt(0);
 	load_cr3(save_pgd);
 	__flush_tlb_all();
-
-	load_fixmap_gdt(0);
 }
 
 void __init efi_runtime_update_mappings(void)
diff --git a/security/Kconfig b/security/Kconfig
index 27d8b2688f75..d9aa521b5206 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -57,7 +57,7 @@ config SECURITY_NETWORK
 config PAGE_TABLE_ISOLATION
 	bool "Remove the kernel mapping in user mode"
 	default y
-	depends on X86 && !UML
+	depends on (X86_64 || X86_PAE) && !UML
 	help
 	  This feature reduces the number of hardware side channels by
 	  ensuring that the majority of kernel addresses are not mapped

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-07-30 17:59 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-07-30 17:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a x86/boot: Fix if_changed build flip/flop bug

Misc fixes:

 - a build race fix
 - a Xen entry fix
 - a TSC_DEADLINE quirk future-proofing fix
 
 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/entry/64: Remove %ebx handling from error_entry/exit

Kees Cook (1):
      x86/boot: Fix if_changed build flip/flop bug

Len Brown (1):
      x86/apic: Future-proof the TSC_DEADLINE quirk for SKX


 arch/x86/boot/compressed/Makefile |  8 ++++++--
 arch/x86/entry/entry_64.S         | 18 ++++--------------
 arch/x86/kernel/apic/apic.c       |  3 +++
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..169c2feda14a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,9 +106,13 @@ define cmd_check_data_rel
 	done
 endef
 
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD      $@
+      cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
-	$(call if_changed,check_data_rel)
-	$(call if_changed,ld)
+	$(call if_changed,check-and-link-vmlinux)
 
 OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73a522d53b53..8ae7ffda8f98 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -981,7 +981,7 @@ ENTRY(\sym)
 
 	call	\do_sym
 
-	jmp	error_exit			/* %ebx: no swapgs flag */
+	jmp	error_exit
 	.endif
 END(\sym)
 .endm
@@ -1222,7 +1222,6 @@ END(paranoid_exit)
 
 /*
  * Save all registers in pt_regs, and switch GS if needed.
- * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
 	UNWIND_HINT_FUNC
@@ -1269,7 +1268,6 @@ ENTRY(error_entry)
 	 * for these here too.
 	 */
 .Lerror_kernelspace:
-	incl	%ebx
 	leaq	native_irq_return_iret(%rip), %rcx
 	cmpq	%rcx, RIP+8(%rsp)
 	je	.Lerror_bad_iret
@@ -1303,28 +1301,20 @@ ENTRY(error_entry)
 
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
-	 * as if we faulted immediately after IRET and clear EBX so that
-	 * error_exit knows that we will be returning to user mode.
+	 * as if we faulted immediately after IRET.
 	 */
 	mov	%rsp, %rdi
 	call	fixup_bad_iret
 	mov	%rax, %rsp
-	decl	%ebx
 	jmp	.Lerror_entry_from_usermode_after_swapgs
 END(error_entry)
 
-
-/*
- * On entry, EBX is a "return to kernel mode" flag:
- *   1: already in kernel mode, don't need SWAPGS
- *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
- */
 ENTRY(error_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
-	testl	%ebx, %ebx
-	jnz	retint_kernel
+	testb	$3, CS(%rsp)
+	jz	retint_kernel
 	jmp	retint_user
 END(error_exit)
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..adbda5847b14 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void)
 	case 0x04: return 0x02000014;
 	}
 
+	if (boot_cpu_data.x86_stepping > 4)
+		return 0;
+
 	return ~0U;
 }
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-07-02 18:53     ` Linus Torvalds
@ 2018-07-03  7:56       ` Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-07-03  7:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Linux Kernel Mailing List, Thomas Gleixner,
	Peter Zijlstra, Andrew Morton


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Mon, Jul 2, 2018 at 11:48 AM Andy Lutomirski <luto@amacapital.net> wrote:
> >
> > BTR is way more leet than AND!
> 
> I stand corrected.

Ok, on that basis I won't try to convert it to AND ;-)

Seriously though, there's two other 32-bit prefix cleanup/micro-speedup changes 
I'll queue up later today:

  [PATCH v2] x86-64: use 32-bit XOR to zero registers
  [PATCH] x86/entry/64: add two more instruction suffixes

I'll Cc: you guys on the commits and maybe you can find something weird (or leet) 
in them as well.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-07-02 18:47   ` Andy Lutomirski
@ 2018-07-02 18:53     ` Linus Torvalds
  2018-07-03  7:56       ` Ingo Molnar
  0 siblings, 1 reply; 539+ messages in thread
From: Linus Torvalds @ 2018-07-02 18:53 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
	Peter Zijlstra, Andrew Morton

On Mon, Jul 2, 2018 at 11:48 AM Andy Lutomirski <luto@amacapital.net> wrote:
>
> BTR is way more leet than AND!

I stand corrected.

              Linus

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-06-30 19:01 ` Linus Torvalds
@ 2018-07-02 18:47   ` Andy Lutomirski
  2018-07-02 18:53     ` Linus Torvalds
  0 siblings, 1 reply; 539+ messages in thread
From: Andy Lutomirski @ 2018-07-02 18:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
	Peter Zijlstra, Andrew Morton

On Sat, Jun 30, 2018 at 12:01 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Sat, Jun 30, 2018 at 1:49 AM Ingo Molnar <mingo@kernel.org> wrote:
>>
>> --- a/arch/x86/entry/entry_32.S
>> +++ b/arch/x86/entry/entry_32.S
>> @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
>>          * whereas POPF does not.)
>>          */
>>         addl    $PT_EFLAGS-PT_DS, %esp  /* point esp at pt_regs->flags */
>> -       btr     $X86_EFLAGS_IF_BIT, (%esp)
>> +       btrl    $X86_EFLAGS_IF_BIT, (%esp)
>>         popfl
>
> Ho humm. Just looking at this patch, my reaction was "why isn't this
> an 'andl $~X86_EFLAGS_IF' instead"?
>
> Yeah, I guess the 'andl' is two bytes longer (due to the 32-bit
> constant - because IF is bit 9, you can't use a byte constant, and you
> don't want to get a partial word write just before the popfl).
>
> But btr is really pretty heavy operation for older CPU's (it's gotten
> better, but 32-bit code presumably cares more about the older CPUs).
>
> It really doesn't matter, I guess. The btr goes back to commit
> c2c9b52fab0d ("x86/entry/32: Restore FLAGS on SYSEXIT").
>
> Andy?
>

BTR is way more leet than AND!

Seriously, though, I've never really tried to shave cycles off the
32-bit code, and BTR is shorter, and I didn't spend more than about
one brain cycle thinking about it.  I guess that BTR has a more
complicated flags pipeline (the output flags depend on the input, not
just the output) and probably uses some more complicated ALU circuit
as compared to ANDL.

--Andy

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-06-30  8:49 Ingo Molnar
@ 2018-06-30 19:01 ` Linus Torvalds
  2018-07-02 18:47   ` Andy Lutomirski
  0 siblings, 1 reply; 539+ messages in thread
From: Linus Torvalds @ 2018-06-30 19:01 UTC (permalink / raw)
  To: Ingo Molnar, Andy Lutomirski
  Cc: Linux Kernel Mailing List, Thomas Gleixner, Peter Zijlstra,
	Andrew Morton

On Sat, Jun 30, 2018 at 1:49 AM Ingo Molnar <mingo@kernel.org> wrote:
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
>          * whereas POPF does not.)
>          */
>         addl    $PT_EFLAGS-PT_DS, %esp  /* point esp at pt_regs->flags */
> -       btr     $X86_EFLAGS_IF_BIT, (%esp)
> +       btrl    $X86_EFLAGS_IF_BIT, (%esp)
>         popfl

Ho humm. Just looking at this patch, my reaction was "why isn't this
an 'andl $~X86_EFLAGS_IF' instead"?

Yeah, I guess the 'andl' is two bytes longer (due to the 32-bit
constant - because IF is bit 9, you can't use a byte constant, and you
don't want to get a partial word write just before the popfl).

But btr is really pretty heavy operation for older CPU's (it's gotten
better, but 32-bit code presumably cares more about the older CPUs).

It really doesn't matter, I guess. The btr goes back to commit
c2c9b52fab0d ("x86/entry/32: Restore FLAGS on SYSEXIT").

Andy?

                Linus

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-06-30  8:49 Ingo Molnar
  2018-06-30 19:01 ` Linus Torvalds
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-06-30  8:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: d79d0d8ad0cb3d782b41631dfeac8eb05e414bcd x86/mm: Clean up the printk()s in show_fault_oops()

The biggest diffstat comes from self-test updates, plus there's entry code fixes, 
5-level paging related fixes, console debug output fixes, plus misc fixes.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  tools/testing/selftests/x86/sigreturn.c# e8a445dea219: selftests/x86/sigreturn: Do 
                                   # ec3480205660: selftests/x86/sigreturn/64: 

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/mm: Don't free P4D table when it is folded at runtime

Andy Lutomirski (3):
      x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80"
      selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs
      selftests/x86/sigreturn: Do minor cleanups

Dmitry Vyukov (2):
      x86/mm: Get rid of KERN_CONT in show_fault_oops()
      x86/mm: Clean up the printk()s in show_fault_oops()

Jan Beulich (1):
      x86/entry/32: Add explicit 'l' instruction suffix

Kirill A. Shutemov (2):
      x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
      x86/mm: Drop unneeded __always_inline for p4d page table helpers


 arch/x86/entry/entry_32.S               |  2 +-
 arch/x86/entry/entry_64_compat.S        | 16 ++++-----
 arch/x86/include/asm/pgalloc.h          |  3 ++
 arch/x86/include/asm/pgtable.h          |  2 +-
 arch/x86/include/asm/pgtable_64.h       |  4 +--
 arch/x86/mm/fault.c                     | 21 ++++--------
 arch/x86/platform/efi/efi_64.c          |  4 +--
 tools/testing/selftests/x86/sigreturn.c | 59 ++++++++++++++++++++-------------
 8 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2582881d19ce..c371bfee137a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
 	 * whereas POPF does not.)
 	 */
 	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
-	btr	$X86_EFLAGS_IF_BIT, (%esp)
+	btrl	$X86_EFLAGS_IF_BIT, (%esp)
 	popfl
 
 	/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9de7f1e1dede..7d0df78db727 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8			/* pt_regs->r8 */
+	pushq   $0			/* pt_regs->r8  = 0 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   %r9			/* pt_regs->r9 */
+	pushq   $0			/* pt_regs->r9  = 0 */
 	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   %r10			/* pt_regs->r10 */
+	pushq   $0			/* pt_regs->r10 = 0 */
 	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   %r11			/* pt_regs->r11 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
@@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	xorl	%ecx, %ecx		/* nospec   cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   %r8			/* pt_regs->r8 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   %r9			/* pt_regs->r9 */
 	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   %r10			/* pt_regs->r10*/
 	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   $0			/* pt_regs->r11 = 0 */
+	pushq   %r11			/* pt_regs->r11 */
 	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index ada6410fd2ec..fbd578daa66e 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
 
 static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 {
+	if (!pgtable_l5_enabled())
+		return;
+
 	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
 	free_page((unsigned long)p4d);
 }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 99ecde23c3ec..5715647fc4fe 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -898,7 +898,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 #define pgd_page(pgd)	pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
-static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
 	if (!pgtable_l5_enabled())
 		return (p4d_t *)pgd;
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 0fdcd21dadbd..3c5385f9a88f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
 }
 #endif
 
-static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
 	pgd_t pgd;
 
@@ -230,7 +230,7 @@ static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 	*p4dp = native_make_p4d(native_pgd_val(pgd));
 }
 
-static __always_inline void native_p4d_clear(p4d_t *p4d)
+static inline void native_p4d_clear(p4d_t *p4d)
 {
 	native_set_p4d(p4d, native_make_p4d(0));
 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9a84a0d08727..2aafa6ab6103 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -641,11 +641,6 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
-static const char nx_warning[] = KERN_CRIT
-"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
-static const char smep_warning[] = KERN_CRIT
-"unable to execute userspace code (SMEP?) (uid: %d)\n";
-
 static void
 show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address)
@@ -664,20 +659,18 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		pte = lookup_address_in_pgd(pgd, address, &level);
 
 		if (pte && pte_present(*pte) && !pte_exec(*pte))
-			printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+			pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
+				from_kuid(&init_user_ns, current_uid()));
 		if (pte && pte_present(*pte) && pte_exec(*pte) &&
 				(pgd_flags(*pgd) & _PAGE_USER) &&
 				(__read_cr4() & X86_CR4_SMEP))
-			printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
+			pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
+				from_kuid(&init_user_ns, current_uid()));
 	}
 
-	printk(KERN_ALERT "BUG: unable to handle kernel ");
-	if (address < PAGE_SIZE)
-		printk(KERN_CONT "NULL pointer dereference");
-	else
-		printk(KERN_CONT "paging request");
-
-	printk(KERN_CONT " at %px\n", (void *) address);
+	pr_alert("BUG: unable to handle kernel %s at %px\n",
+		 address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
+		 (void *)address);
 
 	dump_pagetable(address);
 }
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e01f7ceb9e7a..77873ce700ae 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
 
-		if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+		if (!pgd_present(*pgd))
 			continue;
 
 		for (i = 0; i < PTRS_PER_P4D; i++) {
 			p4d = p4d_offset(pgd,
 					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
 
-			if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+			if (!p4d_present(*p4d))
 				continue;
 
 			pud = (pud_t *)p4d_page_vaddr(*p4d);
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 246145b84a12..4d9dc3f2fd70 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -610,21 +610,41 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 	 */
 	for (int i = 0; i < NGREG; i++) {
 		greg_t req = requested_regs[i], res = resulting_regs[i];
+
 		if (i == REG_TRAPNO || i == REG_IP)
 			continue;	/* don't care */
-		if (i == REG_SP) {
-			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
-			       (unsigned long long)res);
 
+		if (i == REG_SP) {
 			/*
-			 * In many circumstances, the high 32 bits of rsp
-			 * are zeroed.  For example, we could be a real
-			 * 32-bit program, or we could hit any of a number
-			 * of poorly-documented IRET or segmented ESP
-			 * oddities.  If this happens, it's okay.
+			 * If we were using a 16-bit stack segment, then
+			 * the kernel is a bit stuck: IRET only restores
+			 * the low 16 bits of ESP/RSP if SS is 16-bit.
+			 * The kernel uses a hack to restore bits 31:16,
+			 * but that hack doesn't help with bits 63:32.
+			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
+			 * AMD CPUs, they leak the high bits of the kernel
+			 * espfix64 stack pointer.  There's very little that
+			 * the kernel can do about it.
+			 *
+			 * Similarly, if we are returning to a 32-bit context,
+			 * the CPU will often lose the high 32 bits of RSP.
 			 */
-			if (res == (req & 0xFFFFFFFF))
-				continue;  /* OK; not expected to work */
+
+			if (res == req)
+				continue;
+
+			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+				printf("[NOTE]\tSP: %llx -> %llx\n",
+				       (unsigned long long)req,
+				       (unsigned long long)res);
+				continue;
+			}
+
+			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
 		}
 
 		bool ignore_reg = false;
@@ -654,25 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 #endif
 
 		/* Sanity check on the kernel */
-		if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+		if (i == REG_CX && req != res) {
 			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
-			       (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 			continue;
 		}
 
-		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
-			/*
-			 * SP is particularly interesting here.  The
-			 * usual cause of failures is that we hit the
-			 * nasty IRET case of returning to a 16-bit SS,
-			 * in which case bits 16:31 of the *kernel*
-			 * stack pointer persist in ESP.
-			 */
+		if (req != res && !ignore_reg) {
 			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
-			       i, (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       i, (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 		}
 	}

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-03-31 10:36 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-03-31 10:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: bd47a85acd727e27b7283daff557865ad04c59f6 x86/platform/UV: Fix critical UV MMR address error

Two UV platform fixes, and a kbuild fix.

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Add APIC idt entry

Sven Wegener (1):
      x86/purgatory: Avoid creating stray .<pid>.d files, remove -MD from KBUILD_CFLAGS

mike.travis@hpe.com (1):
      x86/platform/UV: Fix critical UV MMR address error


 arch/x86/include/asm/hw_irq.h     | 1 +
 arch/x86/include/asm/uv/uv_mmrs.h | 2 +-
 arch/x86/kernel/idt.c             | 3 +++
 arch/x86/platform/uv/tlb_uv.c     | 2 --
 arch/x86/purgatory/Makefile       | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 2851077b6051..32e666e1231e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
 extern asmlinkage void kvm_posted_intr_nested_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void uv_bau_message_intr1(void);
 
 extern asmlinkage void spurious_interrupt(void);
 extern asmlinkage void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ecb9ddef128f..62c79e26a59a 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
 #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR (				\
 	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
 	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 56d99be3706a..5d039c848bd9 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -140,6 +140,9 @@ static const __initconst struct idt_data apic_idts[] = {
 # ifdef CONFIG_IRQ_WORK
 	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
 # endif
+#ifdef CONFIG_X86_UV
+	INTG(UV_BAU_MESSAGE,		uv_bau_message_intr1),
+#endif
 	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
 	INTG(ERROR_APIC_VECTOR,		error_interrupt),
 #endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index db77e087adaf..b36caae0fb2f 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2255,8 +2255,6 @@ static int __init uv_bau_init(void)
 			init_uvhub(uvhub, vector, uv_base_pnode);
 	}
 
-	alloc_intr_gate(vector, uv_bau_message_intr1);
-
 	for_each_possible_blade(uvhub) {
 		if (uv_blade_nr_possible_cpus(uvhub)) {
 			unsigned long val;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2f15a2ac4209..d70c15de417b 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,7 +16,7 @@ KCOV_INSTRUMENT := n
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
 # sure how to relocate those. Like kexec-tools, use custom flags.
 
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
 KBUILD_CFLAGS += -m$(BITS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-02-15  0:45 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-02-15  0:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: fd0e786d9d09024f67bd71ec094b110237dc3840 x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages

Misc fixes all across the map:

 - /proc/kcore vsyscall related fixes
 - LTO fix
 - build warning fix
 - CPU hotplug fix
 - Kconfig NR_CPUS cleanups
 - cpu_has() cleanups/robustification
 - .gitignore fix
 - memory-failure unmapping fix
 - UV platform fix

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  fs/proc/kcore.c                    # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
  include/linux/kcore.h              # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
  include/linux/mm_inline.h          # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u
  mm/memory-failure.c                # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u

 Thanks,

	Ingo

------------------>
Arnd Bergmann (1):
      x86/error_inject: Make just_return_func() globally visible

Borislav Petkov (1):
      x86/MCE: Fix build warning introduced by "x86: do not use print_symbol()"

Ingo Molnar (1):
      x86/Kconfig: Further simplify the NR_CPUS config

Jia Zhang (2):
      vfs/proc/kcore, x86/mm/kcore: Fix SMAP fault when dumping vsyscall user page
      x86/mm/kcore: Add vsyscall page to /proc/kcore conditionally

Masayoshi Mizuma (1):
      x86/smpboot: Fix uncore_pci_remove() indexing bug when hot-removing a physical CPU

Peter Zijlstra (2):
      x86/cpufeature: Reindent _static_cpu_has()
      x86/cpufeature: Update _static_cpu_has() to use all named variables

Progyan Bhattacharya (1):
      x86/build: Add arch/x86/tools/insn_decoder_test to .gitignore

Randy Dunlap (1):
      x86/Kconfig: Simplify NR_CPUS config

Tony Luck (1):
      x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages

mike.travis@hpe.com (1):
      x86/platform/UV: Fix GAM Range Table entries less than 1GB


 arch/x86/.gitignore                       |  1 +
 arch/x86/Kconfig                          | 75 ++++++++++++++++++++++-------
 arch/x86/include/asm/cpufeature.h         | 79 ++++++++++++++++---------------
 arch/x86/include/asm/page_64.h            |  4 --
 arch/x86/kernel/apic/x2apic_uv_x.c        | 15 ++++--
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 ++++++
 arch/x86/kernel/cpu/mcheck/mce.c          | 19 +++++---
 arch/x86/kernel/smpboot.c                 |  1 -
 arch/x86/lib/error-inject.c               |  1 +
 arch/x86/mm/init_64.c                     |  4 +-
 fs/proc/kcore.c                           |  4 ++
 include/linux/kcore.h                     |  1 +
 include/linux/mm_inline.h                 |  6 ---
 mm/memory-failure.c                       |  2 -
 14 files changed, 146 insertions(+), 81 deletions(-)

diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349b2b24..a528c14d45a5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-	bool "Support for big SMP systems with more than 8 CPUs"
-	depends on X86_32 && SMP
-	---help---
-	  This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
 	  Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+	bool "Support for big SMP systems with more than 8 CPUs"
+	depends on SMP
+	---help---
+	  This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
 	bool "Support for extended (non-PC) x86 platforms"
 	default y
@@ -949,25 +949,66 @@ config MAXSMP
 	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+	int
+	default NR_CPUS_RANGE_END if MAXSMP
+	default    1 if !SMP
+	default    2
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_32
+	default   64 if  SMP &&  X86_BIGSMP
+	default    8 if  SMP && !X86_BIGSMP
+	default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+	int
+	depends on X86_64
+	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_32
+	default   32 if  X86_BIGSMP
+	default    8 if  SMP
+	default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+	int
+	depends on X86_64
+	default 8192 if  MAXSMP
+	default   64 if  SMP
+	default    1 if !SMP
+
 config NR_CPUS
 	int "Maximum number of CPUs" if SMP && !MAXSMP
-	range 2 8 if SMP && X86_32 && !X86_BIGSMP
-	range 2 64 if SMP && X86_32 && X86_BIGSMP
-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-	default "1" if !SMP
-	default "8192" if MAXSMP
-	default "32" if SMP && X86_BIGSMP
-	default "8" if SMP && X86_32
-	default "64" if SMP
+	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+	default NR_CPUS_DEFAULT
 	---help---
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
 	  supported value is 8192, otherwise the maximum value is 512.  The
 	  minimum value which makes sense is 2.
 
-	  This is purely to save memory - each supported CPU adds
-	  approximately eight kilobytes to the kernel image.
+	  This is purely to save memory: each supported CPU adds about 8KB
+	  to the kernel image.
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-		asm_volatile_goto("1: jmp 6f\n"
-			 "2:\n"
-			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			         "((5f-4f) - (2b-1b)),0x90\n"
-			 "3:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 4f - .\n"		/* repl offset */
-			 " .word %P1\n"			/* always replace */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 5f - 4f\n"		/* repl len */
-			 " .byte 3b - 2b\n"		/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
-			 "4: jmp %l[t_no]\n"
-			 "5:\n"
-			 ".previous\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"		/* src offset */
-			 " .long 0\n"			/* no replacement */
-			 " .word %P0\n"			/* feature bit */
-			 " .byte 3b - 1b\n"		/* src len */
-			 " .byte 0\n"			/* repl len */
-			 " .byte 0\n"			/* pad len */
-			 ".previous\n"
-			 ".section .altinstr_aux,\"ax\"\n"
-			 "6:\n"
-			 " testb %[bitnum],%[cap_byte]\n"
-			 " jnz %l[t_yes]\n"
-			 " jmp %l[t_no]\n"
-			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-			     [bitnum] "i" (1 << (bit & 7)),
-			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-			 : : t_yes, t_no);
-	t_yes:
-		return true;
-	t_no:
-		return false;
+	asm_volatile_goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
+		 : : [feature]  "i" (bit),
+		     [always]   "i" (X86_FEATURE_ALWAYS),
+		     [bitnum]   "i" (1 << (bit & 7)),
+		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+		 : : t_yes, t_no);
+t_yes:
+	return true;
+t_no:
+	return false;
 }
 
 #define static_cpu_has(bit)					\
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
 	uv_gre_table = gre;
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		unsigned long size = ((unsigned long)(gre->limit - lgre)
+					<< UV_GAM_RANGE_SHFT);
+		int order = 0;
+		char suffix[] = " KMGTPE";
+
+		while (size > 9999 && order < sizeof(suffix)) {
+			size /= 1024;
+			order++;
+		}
+
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
 			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
 		}
-		pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+		pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-			((unsigned long)(gre->limit - lgre)) >>
-				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+			size, suffix[order],
 			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
 		lgre = gre->limit;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..8ff94d1e2dce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
 			m->cs, m->ip);
 
 		if (m->cs == __KERNEL_CS)
-			pr_cont("{%pS}", (void *)m->ip);
+			pr_cont("{%pS}", (void *)(unsigned long)m->ip);
 		pr_cont("\n");
 	}
 
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 
 	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
 		pfn = mce->addr >> PAGE_SHIFT;
-		memory_failure(pfn, 0);
+		if (!memory_failure(pfn, 0))
+			mce_unmap_kpfn(pfn);
 	}
 
 	return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
 	ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
 	if (ret)
 		pr_err("Memory error not recovered");
+	else
+		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
 	return ret;
 }
 
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
 {
 	unsigned long decoy_addr;
 
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
 	 * We would like to just call:
 	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
 	 * but doing that would radically increase the odds of a
-	 * speculative access to the posion page because we'd have
+	 * speculative access to the poison page because we'd have
 	 * the virtual address of the kernel 1:1 mapping sitting
 	 * around in registers.
 	 * Instead we get tricky.  We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
 
 	if (set_memory_np(decoy_addr, 1))
 		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
 }
 #endif
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..cfc61e1d45e2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1430,7 +1430,6 @@ static void remove_siblinginfo(int cpu)
 	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(topology_sibling_cpumask(cpu));
 	cpumask_clear(topology_core_cpumask(cpu));
-	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 	recompute_smt_state();
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
 
 asm(
 	".type just_return_func, @function\n"
+	".globl just_return_func\n"
 	"just_return_func:\n"
 	"	ret\n"
 	".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c852069..8ba9c3128947 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1193,8 +1193,8 @@ void __init mem_init(void)
 	register_page_bootmem_info();
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
-			 PAGE_SIZE, KCORE_OTHER);
+	if (get_gate_vma(&init_mm))
+		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
 	mem_init_print_info(NULL);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e8a93bc8285d..d1e82761de81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 			/* we have to zero-fill user buffer even if no read */
 			if (copy_to_user(buffer, buf, tsz))
 				return -EFAULT;
+		} else if (m->type == KCORE_USER) {
+			/* User page is handled prior to normal kernel page: */
+			if (copy_to_user(buffer, (char *)start, tsz))
+				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
 				/*
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 7ff25a808fef..80db19d3a505 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -10,6 +10,7 @@ enum kcore_type {
 	KCORE_VMALLOC,
 	KCORE_RAM,
 	KCORE_VMEMMAP,
+	KCORE_USER,
 	KCORE_OTHER,
 };
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c30b32e3c862..10191c28fc04 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
 
 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4b80ccee4535..8291b75f42c8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
 		return 0;
 	}
 
-	arch_unmap_kpfn(pfn);
-
 	orig_head = hpage = compound_head(p);
 	num_poisoned_pages_inc();
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-01-18  0:24   ` Ingo Molnar
@ 2018-01-18  0:29     ` Andrew Morton
  0 siblings, 0 replies; 539+ messages in thread
From: Andrew Morton @ 2018-01-18  0:29 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Linux Kernel Mailing List, Thomas Gleixner,
	H. Peter Anvin, Peter Zijlstra, Borislav Petkov

On Thu, 18 Jan 2018 01:24:08 +0100 Ingo Molnar <mingo@kernel.org> wrote:

> 
> * Linus Torvalds <torvalds@linux-foundation.org> wrote:
> 
> > On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > >  - A kdump fix
> > >
> > >   out-of-topic modifications in x86-urgent-for-linus:
> > >   -----------------------------------------------------
> > >   include/linux/crash_core.h         # 9f15b9120f56: kdump: Write the correct add
> > >   kernel/crash_core.c                # 9f15b9120f56: kdump: Write the correct add
> > 
> > This came through Andrew too. It all merged fine since there were no
> > other modifications, but it's a bit odd how this was in the x86 tree,
> > and even if that part makes sense it's a sign of lack of communication
> > at some point.
> > 
> > Oh well. Not a big deal. I just thought I'd mention it.
> 
> Hm, I don't think Andrew Cc:-ed me when he sent it to you, so I didn't notice the 
> duplication. The bug was introduced via the 5-level paging changes in the x86 
> tree, so I assumed the fix would go via the x86 tree as well.
> 
> Andrew was Cc:-ed to the -tip commit.

I cc'ed the whole world on that one:

Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: <stable@vger.kernel.org>

but yeah, I make a lot of noise ;)

Ordinarily I'll auto-drop such things if they turn up in linux-next but
it seems this one got fast-tracked in -tip so that mechanism didn't
work (this is rare).

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-01-17 20:35 ` Linus Torvalds
@ 2018-01-18  0:24   ` Ingo Molnar
  2018-01-18  0:29     ` Andrew Morton
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-01-18  0:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Linux Kernel Mailing List, Thomas Gleixner, H. Peter Anvin,
	Peter Zijlstra, Borislav Petkov, Andrew Morton


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> >  - A kdump fix
> >
> >   out-of-topic modifications in x86-urgent-for-linus:
> >   -----------------------------------------------------
> >   include/linux/crash_core.h         # 9f15b9120f56: kdump: Write the correct add
> >   kernel/crash_core.c                # 9f15b9120f56: kdump: Write the correct add
> 
> This came through Andrew too. It all merged fine since there were no
> other modifications, but it's a bit odd how this was in the x86 tree,
> and even if that part makes sense it's a sign of lack of communication
> at some point.
> 
> Oh well. Not a big deal. I just thought I'd mention it.

Hm, I don't think Andrew Cc:-ed me when he sent it to you, so I didn't notice the 
duplication. The bug was introduced via the 5-level paging changes in the x86 
tree, so I assumed the fix would go via the x86 tree as well.

Andrew was Cc:-ed to the -tip commit.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2018-01-17 15:41 Ingo Molnar
@ 2018-01-17 20:35 ` Linus Torvalds
  2018-01-18  0:24   ` Ingo Molnar
  0 siblings, 1 reply; 539+ messages in thread
From: Linus Torvalds @ 2018-01-17 20:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linux Kernel Mailing List, Thomas Gleixner, H. Peter Anvin,
	Peter Zijlstra, Borislav Petkov, Andrew Morton

On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
>  - A kdump fix
>
>   out-of-topic modifications in x86-urgent-for-linus:
>   -----------------------------------------------------
>   include/linux/crash_core.h         # 9f15b9120f56: kdump: Write the correct add
>   kernel/crash_core.c                # 9f15b9120f56: kdump: Write the correct add

This came through Andrew too. It all merged fine since there were no
other modifications, but it's a bit odd how this was in the x86 tree,
and even if that part makes sense it's a sign of lack of communication
at some point.

Oh well. Not a big deal. I just thought I'd mention it.

               Linus

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-01-17 15:41 Ingo Molnar
  2018-01-17 20:35 ` Linus Torvalds
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2018-01-17 15:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 45d55e7bac4028af93f5fa324e69958a0b868e96 x86/apic/vector: Fix off by one in error path

Misc fixes:

 - A rather involved set of memory hardware encryption fixes to support the early 
   loading of microcode files via the initrd. These are larger than what we 
   normally take at such a late -rc stage, but there are two mitigating factors: 
   1) much of the changes are limited to the SME code itself 2) being able to 
   early load microcode has increased importance in the post-Meltdown/Spectre era.

 - An IRQ vector allocator fix

 - An Intel RDT driver use-after-free fix

 - An APIC driver bug fix/revert to make certain older systems boot again

 - A pkeys ABI fix

 - TSC calibration fixes

 - A kdump fix

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/crash_core.h         # 9f15b9120f56: kdump: Write the correct add
  kernel/crash_core.c                # 9f15b9120f56: kdump: Write the correct add

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      x86/idt: Mark IDT tables __initconst

Eric W. Biederman (1):
      x86/mm/pkeys: Fix fill_sig_info_pkey

Kirill A. Shutemov (1):
      kdump: Write the correct address of mem_section into vmcoreinfo

Len Brown (3):
      x86/tsc: Future-proof native_calibrate_tsc()
      x86/tsc: Fix erroneous TSC rate on Skylake Xeon
      x86/tsc: Print tsc_khz, when it differs from cpu_khz

Thomas Gleixner (2):
      x86/intel_rdt/cqm: Prevent use after free
      x86/apic/vector: Fix off by one in error path

Tom Lendacky (5):
      x86/mm: Clean up register saving in the __enc_copy() assembly code
      x86/mm: Use a struct to reduce parameters for SME PGD mapping
      x86/mm: Centralize PMD flags in sme_encrypt_kernel()
      x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
      x86/mm: Encrypt the initrd earlier for BSP microcode update

Ville Syrjälä (1):
      Revert "x86/apic: Remove init_bsp_APIC()"


 arch/x86/include/asm/apic.h        |   1 +
 arch/x86/include/asm/mem_encrypt.h |   4 +-
 arch/x86/kernel/apic/apic.c        |  49 +++++
 arch/x86/kernel/apic/vector.c      |   7 +-
 arch/x86/kernel/cpu/intel_rdt.c    |   8 +-
 arch/x86/kernel/head64.c           |   4 +-
 arch/x86/kernel/idt.c              |  12 +-
 arch/x86/kernel/irqinit.c          |   3 +
 arch/x86/kernel/setup.c            |  10 --
 arch/x86/kernel/tsc.c              |   9 +-
 arch/x86/mm/fault.c                |   7 +-
 arch/x86/mm/mem_encrypt.c          | 356 +++++++++++++++++++++++++++----------
 arch/x86/mm/mem_encrypt_boot.S     |  80 +++++----
 include/linux/crash_core.h         |   2 +
 kernel/crash_core.c                |   2 +-
 15 files changed, 391 insertions(+), 163 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
 extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
 
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
 
 static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
 	return APIC_SYMMETRIC_IO;
 }
 
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned int value;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+		return;
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+#endif
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	if (apic_extnmi == APIC_EXTNMI_NONE)
+		value |= APIC_LVT_MASKED;
+	apic_write(APIC_LVT1, value);
+}
+
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 
 		err = assign_irq_vector_policy(irqd, info);
 		trace_vector_setup(virq + i, false, err);
-		if (err)
+		if (err) {
+			irqd->chip_data = NULL;
+			free_apic_chip_data(apicd);
 			goto error;
+		}
 	}
 
 	return 0;
 
 error:
-	x86_vector_free_irqs(domain, virq, i + 1);
+	x86_vector_free_irqs(domain, virq, i);
 	return err;
 }
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		 */
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
-		kfree(d->ctrl_val);
-		kfree(d->rmid_busy_llc);
-		kfree(d->mbm_total);
-		kfree(d->mbm_local);
 		list_del(&d->list);
 		if (is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 			cancel_delayed_work(&d->cqm_limbo);
 		}
 
+		kfree(d->ctrl_val);
+		kfree(d->rmid_busy_llc);
+		kfree(d->mbm_total);
+		kfree(d->mbm_local);
 		kfree(d);
 		return;
 	}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	p = fixup_pointer(&phys_base, physaddr);
 	*p += load_delta - sme_get_me_mask();
 
-	/* Encrypt the kernel (if SME is active) */
-	sme_encrypt_kernel();
+	/* Encrypt the kernel and related (if SME is active) */
+	sme_encrypt_kernel(bp);
 
 	/*
 	 * Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
 	INTG(X86_TRAP_DB,		debug),
 	SYSG(X86_TRAP_BP,		int3),
 #ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
  * the traps which use them are reinitialized with IST after cpu_init() has
  * set up TSS.
  */
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
 	INTG(X86_TRAP_DE,		divide_error),
 	INTG(X86_TRAP_NMI,		nmi),
 	INTG(X86_TRAP_BR,		bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
 /*
  * The APIC and SMP idt entries
  */
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
 #ifdef CONFIG_SMP
 	INTG(RESCHEDULE_VECTOR,		reschedule_interrupt),
 	INTG(CALL_FUNCTION_VECTOR,	call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
 	INTG(X86_TRAP_PF,		page_fault),
 };
 
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
  * Override for the debug_idt. Same as the default, but with interrupt
  * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
  */
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
 	INTG(X86_TRAP_DB,	debug),
 	INTG(X86_TRAP_BP,	int3),
 };
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * The exceptions which use Interrupt stacks. They are setup after
  * cpu_init() when the TSS has been initialized.
  */
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
 	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),
 	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK),
 	SISTG(X86_TRAP_BP,	int3,		DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
 	struct irq_chip *chip = legacy_pic->chip;
 	int i;
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	init_bsp_APIC();
+#endif
 	legacy_pic->init(0);
 
 	for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
 	    !ramdisk_image || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
 
-	/*
-	 * If SME is active, this memory will be marked encrypted by the
-	 * kernel when it is accessed (including relocation). However, the
-	 * ramdisk image was loaded decrypted by the bootloader, so make
-	 * sure that it is encrypted before accessing it. For SEV the
-	 * ramdisk will already be encrypted, so only do this for SME.
-	 */
-	if (sme_active())
-		sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
 	initrd_start = 0;
 
 	mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
 		case INTEL_FAM6_KABYLAKE_DESKTOP:
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
-		case INTEL_FAM6_SKYLAKE_X:
 		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
 		}
 	}
 
+	if (crystal_khz == 0)
+		return 0;
 	/*
 	 * TSC frequency determined by CPUID is a "hardware reported"
 	 * frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	if (cpu_khz != tsc_khz) {
+		pr_info("Detected %lu.%03lu MHz TSC",
+			(unsigned long)tsc_khz / 1000,
+			(unsigned long)tsc_khz % 1000);
+	}
+
 	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
 	tsc_store_and_check_tsc_adjust(true);
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *	     faulted on a pte with its pkey=4.
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+		u32 *pkey)
 {
 	/* This is effectively an #ifdef */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return;
 
 	/* Fault not from Protection Keys: nothing to do */
-	if (si_code != SEGV_PKUERR)
+	if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
 		return;
 	/*
 	 * force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		lsb = PAGE_SHIFT;
 	info.si_addr_lsb = lsb;
 
-	fill_sig_info_pkey(si_code, &info, pkey);
+	fill_sig_info_pkey(si_signo, si_code, &info, pkey);
 
 	force_sig_info(si_signo, &info, tsk);
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
 	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
 }
 
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
-				 unsigned long end)
+struct sme_populate_pgd_data {
+	void	*pgtable_area;
+	pgd_t	*pgd;
+
+	pmdval_t pmd_flags;
+	pteval_t pte_flags;
+	unsigned long paddr;
+
+	unsigned long vaddr;
+	unsigned long vaddr_end;
+};
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
 {
 	unsigned long pgd_start, pgd_end, pgd_size;
 	pgd_t *pgd_p;
 
-	pgd_start = start & PGDIR_MASK;
-	pgd_end = end & PGDIR_MASK;
+	pgd_start = ppd->vaddr & PGDIR_MASK;
+	pgd_end = ppd->vaddr_end & PGDIR_MASK;
 
-	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
-	pgd_size *= sizeof(pgd_t);
+	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
 
-	pgd_p = pgd_base + pgd_index(start);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 
 	memset(pgd_p, 0, pgd_size);
 }
 
-#define PGD_FLAGS	_KERNPG_TABLE_NOENC
-#define P4D_FLAGS	_KERNPG_TABLE_NOENC
-#define PUD_FLAGS	_KERNPG_TABLE_NOENC
-#define PMD_FLAGS	(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS		_KERNPG_TABLE_NOENC
+#define P4D_FLAGS		_KERNPG_TABLE_NOENC
+#define PUD_FLAGS		_KERNPG_TABLE_NOENC
+#define PMD_FLAGS		_KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE		(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC		PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP	((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC		(PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS		(__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC		PTE_FLAGS
+#define PTE_FLAGS_DEC_WP	((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC		(PTE_FLAGS | _PAGE_ENC)
 
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
-				     unsigned long vaddr, pmdval_t pmd_val)
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
 {
 	pgd_t *pgd_p;
 	p4d_t *p4d_p;
 	pud_t *pud_p;
 	pmd_t *pmd_p;
 
-	pgd_p = pgd_base + pgd_index(vaddr);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 	if (native_pgd_val(*pgd_p)) {
 		if (IS_ENABLED(CONFIG_X86_5LEVEL))
 			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 		pgd_t pgd;
 
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			p4d_p = pgtable_area;
+			p4d_p = ppd->pgtable_area;
 			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
-			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+			ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
 
 			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
 		} else {
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
 		}
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 	}
 
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-		p4d_p += p4d_index(vaddr);
+		p4d_p += p4d_index(ppd->vaddr);
 		if (native_p4d_val(*p4d_p)) {
 			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
 		} else {
 			p4d_t p4d;
 
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
 			native_set_p4d(p4d_p, p4d);
 		}
 	}
 
-	pud_p += pud_index(vaddr);
+	pud_p += pud_index(ppd->vaddr);
 	if (native_pud_val(*pud_p)) {
 		if (native_pud_val(*pud_p) & _PAGE_PSE)
-			goto out;
+			return NULL;
 
 		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
 	} else {
 		pud_t pud;
 
-		pmd_p = pgtable_area;
+		pmd_p = ppd->pgtable_area;
 		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
-		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+		ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
 
 		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
 		native_set_pud(pud_p, pud);
 	}
 
-	pmd_p += pmd_index(vaddr);
+	return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
 	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
-		native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+		native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
 
-out:
-	return pgtable_area;
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+	pte_t *pte_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
+	if (native_pmd_val(*pmd_p)) {
+		if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+			return;
+
+		pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+	} else {
+		pmd_t pmd;
+
+		pte_p = ppd->pgtable_area;
+		memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+		ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+		pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+		native_set_pmd(pmd_p, pmd);
+	}
+
+	pte_p += pte_index(ppd->vaddr);
+	if (!native_pte_val(*pte_p))
+		native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd_large(ppd);
+
+		ppd->vaddr += PMD_PAGE_SIZE;
+		ppd->paddr += PMD_PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd(ppd);
+
+		ppd->vaddr += PAGE_SIZE;
+		ppd->paddr += PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+				   pmdval_t pmd_flags, pteval_t pte_flags)
+{
+	unsigned long vaddr_end;
+
+	ppd->pmd_flags = pmd_flags;
+	ppd->pte_flags = pte_flags;
+
+	/* Save original end value since we modify the struct value */
+	vaddr_end = ppd->vaddr_end;
+
+	/* If start is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+	__sme_map_range_pte(ppd);
+
+	/* Create PMD entries */
+	ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+	__sme_map_range_pmd(ppd);
+
+	/* If end is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = vaddr_end;
+	__sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
 }
 
 static unsigned long __init sme_pgtable_calc(unsigned long len)
 {
-	unsigned long p4d_size, pud_size, pmd_size;
+	unsigned long p4d_size, pud_size, pmd_size, pte_size;
 	unsigned long total;
 
 	/*
 	 * Perform a relatively simplistic calculation of the pagetable
-	 * entries that are needed. That mappings will be covered by 2MB
-	 * PMD entries so we can conservatively calculate the required
+	 * entries that are needed. Those mappings will be covered mostly
+	 * by 2MB PMD entries so we can conservatively calculate the required
 	 * number of P4D, PUD and PMD structures needed to perform the
-	 * mappings. Incrementing the count for each covers the case where
-	 * the addresses cross entries.
+	 * mappings.  For mappings that are not 2MB aligned, PTE mappings
+	 * would be needed for the start and end portion of the address range
+	 * that fall outside of the 2MB alignment.  This results in, at most,
+	 * two extra pages to hold PTE entries for each range that is mapped.
+	 * Incrementing the count for each covers the case where the addresses
+	 * cross entries.
 	 */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
 		p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	}
 	pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
 	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+	pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
 
-	total = p4d_size + pud_size + pmd_size;
+	total = p4d_size + pud_size + pmd_size + pte_size;
 
 	/*
 	 * Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	return total;
 }
 
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
 {
 	unsigned long workarea_start, workarea_end, workarea_len;
 	unsigned long execute_start, execute_end, execute_len;
 	unsigned long kernel_start, kernel_end, kernel_len;
+	unsigned long initrd_start, initrd_end, initrd_len;
+	struct sme_populate_pgd_data ppd;
 	unsigned long pgtable_area_len;
-	unsigned long paddr, pmd_flags;
 	unsigned long decrypted_base;
-	void *pgtable_area;
-	pgd_t *pgd;
 
 	if (!sme_active())
 		return;
 
 	/*
-	 * Prepare for encrypting the kernel by building new pagetables with
-	 * the necessary attributes needed to encrypt the kernel in place.
+	 * Prepare for encrypting the kernel and initrd by building new
+	 * pagetables with the necessary attributes needed to encrypt the
+	 * kernel in place.
 	 *
 	 *   One range of virtual addresses will map the memory occupied
-	 *   by the kernel as encrypted.
+	 *   by the kernel and initrd as encrypted.
 	 *
 	 *   Another range of virtual addresses will map the memory occupied
-	 *   by the kernel as decrypted and write-protected.
+	 *   by the kernel and initrd as decrypted and write-protected.
 	 *
 	 *     The use of write-protect attribute will prevent any of the
 	 *     memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
 	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
 	kernel_len = kernel_end - kernel_start;
 
+	initrd_start = 0;
+	initrd_end = 0;
+	initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+		     ((unsigned long)bp->ext_ramdisk_size << 32);
+	if (initrd_len) {
+		initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+			       ((unsigned long)bp->ext_ramdisk_image << 32);
+		initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+		initrd_len = initrd_end - initrd_start;
+	}
+#endif
+
 	/* Set the encryption workarea to be immediately after the kernel */
 	workarea_start = kernel_end;
 
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
 	 */
 	pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
 	pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+	if (initrd_len)
+		pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
 
 	/* PUDs and PMDs needed in the current pagetables for the workarea */
 	pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
 
 	/*
 	 * The total workarea includes the executable encryption area and
-	 * the pagetable area.
+	 * the pagetable area. The start of the workarea is already 2MB
+	 * aligned, align the end of the workarea on a 2MB boundary so that
+	 * we don't try to create/allocate PTE entries from the workarea
+	 * before it is mapped.
 	 */
 	workarea_len = execute_len + pgtable_area_len;
-	workarea_end = workarea_start + workarea_len;
+	workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
 
 	/*
 	 * Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
 	 * pagetables and when the new encrypted and decrypted kernel
 	 * mappings are populated.
 	 */
-	pgtable_area = (void *)execute_end;
+	ppd.pgtable_area = (void *)execute_end;
 
 	/*
 	 * Make sure the current pagetable structure has entries for
 	 * addressing the workarea.
 	 */
-	pgd = (pgd_t *)native_read_cr3_pa();
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = (pgd_t *)native_read_cr3_pa();
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
 
 	/*
 	 * A new pagetable structure is being built to allow for the kernel
-	 * to be encrypted. It starts with an empty PGD that will then be
-	 * populated with new PUDs and PMDs as the encrypted and decrypted
-	 * kernel mappings are created.
+	 * and initrd to be encrypted. It starts with an empty PGD that will
+	 * then be populated with new PUDs and PMDs as the encrypted and
+	 * decrypted kernel mappings are created.
 	 */
-	pgd = pgtable_area;
-	memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
-	pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
-
-	/* Add encrypted kernel (identity) mappings */
-	pmd_flags = PMD_FLAGS | _PAGE_ENC;
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = ppd.pgtable_area;
+	memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+	ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
 
 	/*
 	 * A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
 	 * the base of the mapping.
 	 */
 	decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+	if (initrd_len) {
+		unsigned long check_base;
+
+		check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+		decrypted_base = max(decrypted_base, check_base);
+	}
 	decrypted_base <<= PGDIR_SHIFT;
 
+	/* Add encrypted kernel (identity) mappings */
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start;
+	ppd.vaddr_end = kernel_end;
+	sme_map_range_encrypted(&ppd);
+
 	/* Add decrypted, write-protected kernel (non-identity) mappings */
-	pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_map_range_decrypted_wp(&ppd);
+
+	if (initrd_len) {
+		/* Add encrypted initrd (identity) mappings */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start;
+		ppd.vaddr_end = initrd_end;
+		sme_map_range_encrypted(&ppd);
+		/*
+		 * Add decrypted, write-protected initrd (non-identity) mappings
+		 */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_map_range_decrypted_wp(&ppd);
 	}
 
 	/* Add decrypted workarea mappings to both kernel mappings */
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_map_range_decrypted(&ppd);
 
 	/* Perform the encryption */
 	sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
-			    kernel_len, workarea_start, (unsigned long)pgd);
+			    kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+	if (initrd_len)
+		sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+				    initrd_len, workarea_start,
+				    (unsigned long)ppd.pgd);
 
 	/*
 	 * At this point we are running encrypted.  Remove the mappings for
 	 * the decrypted areas - all that is needed for this is to remove
 	 * the PGD entry/entries.
 	 */
-	sme_clear_pgd(pgd, kernel_start + decrypted_base,
-		      kernel_end + decrypted_base);
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_clear_pgd(&ppd);
+
+	if (initrd_len) {
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_clear_pgd(&ppd);
+	}
 
-	sme_clear_pgd(pgd, workarea_start + decrypted_base,
-		      workarea_end + decrypted_base);
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_clear_pgd(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
 
 	/*
 	 * Entry parameters:
-	 *   RDI - virtual address for the encrypted kernel mapping
-	 *   RSI - virtual address for the decrypted kernel mapping
-	 *   RDX - length of kernel
+	 *   RDI - virtual address for the encrypted mapping
+	 *   RSI - virtual address for the decrypted mapping
+	 *   RDX - length to encrypt
 	 *   RCX - virtual address of the encryption workarea, including:
 	 *     - stack page (PAGE_SIZE)
 	 *     - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
 	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */
 
 	push	%r12
-	movq	%rdi, %r10		/* Encrypted kernel */
-	movq	%rsi, %r11		/* Decrypted kernel */
-	movq	%rdx, %r12		/* Kernel length */
+	movq	%rdi, %r10		/* Encrypted area */
+	movq	%rsi, %r11		/* Decrypted area */
+	movq	%rdx, %r12		/* Area length */
 
 	/* Copy encryption routine into the workarea */
 	movq	%rax, %rdi				/* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
 	rep	movsb
 
 	/* Setup registers for call */
-	movq	%r10, %rdi		/* Encrypted kernel */
-	movq	%r11, %rsi		/* Decrypted kernel */
+	movq	%r10, %rdi		/* Encrypted area */
+	movq	%r11, %rsi		/* Decrypted area */
 	movq	%r8, %rdx		/* Pagetables used for encryption */
-	movq	%r12, %rcx		/* Kernel length */
+	movq	%r12, %rcx		/* Area length */
 	movq	%rax, %r8		/* Workarea encryption routine */
 	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
 
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
 
 ENTRY(__enc_copy)
 /*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
  *   This routine must be run outside of the kernel proper since
  *   the kernel will be encrypted during the process. So this
  *   routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
  *   during execution.
  *
  *   On entry the registers must be:
- *     RDI - virtual address for the encrypted kernel mapping
- *     RSI - virtual address for the decrypted kernel mapping
+ *     RDI - virtual address for the encrypted mapping
+ *     RSI - virtual address for the decrypted mapping
  *     RDX - address of the pagetables to use for encryption
- *     RCX - length of kernel
+ *     RCX - length of area
  *      R8 - intermediate copy buffer
  *
  *     RAX - points to this routine
  *
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
  */
 	/* Enable the new page tables */
 	mov	%rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
 	orq	$X86_CR4_PGE, %rdx
 	mov	%rdx, %cr4
 
+	push	%r15
+	push	%r12
+
+	movq	%rcx, %r9		/* Save area length */
+	movq	%rdi, %r10		/* Save encrypted area address */
+	movq	%rsi, %r11		/* Save decrypted area address */
+
 	/* Set the PAT register PA5 entry to write-protect */
-	push	%rcx
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	push	%rdx			/* Save original PAT value */
+	mov	%rdx, %r15		/* Save original PAT value */
 	andl	$0xffff00ff, %edx	/* Clear PA5 */
 	orl	$0x00000500, %edx	/* Set PA5 to WP */
 	wrmsr
-	pop	%rdx			/* RDX contains original PAT value */
-	pop	%rcx
-
-	movq	%rcx, %r9		/* Save kernel length */
-	movq	%rdi, %r10		/* Save encrypted kernel address */
-	movq	%rsi, %r11		/* Save decrypted kernel address */
 
 	wbinvd				/* Invalidate any cache entries */
 
-	/* Copy/encrypt 2MB at a time */
+	/* Copy/encrypt up to 2MB at a time */
+	movq	$PMD_PAGE_SIZE, %r12
 1:
-	movq	%r11, %rsi		/* Source - decrypted kernel */
+	cmpq	%r12, %r9
+	jnb	2f
+	movq	%r9, %r12
+
+2:
+	movq	%r11, %rsi		/* Source - decrypted area */
 	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r12, %rcx
 	rep	movsb
 
 	movq	%r8, %rsi		/* Source - intermediate copy buffer */
-	movq	%r10, %rdi		/* Dest   - encrypted kernel */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r10, %rdi		/* Dest   - encrypted area */
+	movq	%r12, %rcx
 	rep	movsb
 
-	addq	$PMD_PAGE_SIZE, %r11
-	addq	$PMD_PAGE_SIZE, %r10
-	subq	$PMD_PAGE_SIZE, %r9	/* Kernel length decrement */
+	addq	%r12, %r11
+	addq	%r12, %r10
+	subq	%r12, %r9		/* Kernel length decrement */
 	jnz	1b			/* Kernel length not zero? */
 
 	/* Restore PAT register */
-	push	%rdx			/* Save original PAT value */
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	pop	%rdx			/* Restore original PAT value */
+	mov	%r15, %rdx		/* Restore original PAT value */
 	wrmsr
 
+	pop	%r12
+	pop	%r15
+
 	ret
 .L__enc_copy_end:
 ENDPROC(__enc_copy)
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef30449..b511f6d24b42 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
 	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+	vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
 #define VMCOREINFO_SIZE(name) \
 	vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
 			      (unsigned long)sizeof(name))
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..4f63597c824d 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #ifdef CONFIG_SPARSEMEM
-	VMCOREINFO_SYMBOL(mem_section);
+	VMCOREINFO_SYMBOL_ARRAY(mem_section);
 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
 	VMCOREINFO_STRUCT_SIZE(mem_section);
 	VMCOREINFO_OFFSET(mem_section, section_mem_map);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2018-01-12 13:56 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2018-01-12 13:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 9d0513d82f1a8fe17b41f113ac5922fa57dbaf5c x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"

There two pending (non-PTI) x86 fixes:

 - an Intel-MID crash fix
 - and an Intel microcode loader blacklist quirk to avoid a problematic revision.

 Thanks,

	Ingo

------------------>
Andy Shevchenko (1):
      x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"

Jia Zhang (1):
      x86/microcode/intel: Extend BDW late-loading with a revision check


 arch/x86/kernel/cpu/microcode/intel.c                 | 13 +++++++++++--
 arch/x86/platform/intel-mid/device_libs/platform_bt.c |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8ccdca6d3f9e..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-		pr_err_once("late loading on model 79 is disabled.\n");
+	/*
+	 * Late loading on model 79 with microcode revision less than 0x0b000021
+	 * may result in a system hang. This behavior is documented in item
+	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+	 */
+	if (c->x86 == 6 &&
+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+	    c->x86_mask == 0x01 &&
+	    c->microcode < 0x0b000021) {
+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 		return true;
 	}
 
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 	return 0;
 }
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
 	.setup	= tng_bt_sfi_setup,
 };
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-17 11:41       ` Thomas Gleixner
@ 2017-12-17 15:15         ` Borislav Petkov
  0 siblings, 0 replies; 539+ messages in thread
From: Borislav Petkov @ 2017-12-17 15:15 UTC (permalink / raw)
  To: Thomas Gleixner, Andy Lutomirski
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, H. Peter Anvin,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf

On Sun, Dec 17, 2017 at 12:41:42PM +0100, Thomas Gleixner wrote:
> > X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> > DISABLED_BUGS or whatever if it's not configured in, which will reduce
> > bloat.  Borislav, that's kind of up your alley, since I don't think
> > the appropriate mask even exists right now.
> 
> Will have a look.

Right, I converted X86_BUG_CPU_SECURE_MODE_PTI bug bit to an X86_FEATURE
bit and then the DISABLED_FEATURES thing should just work.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-17  3:25     ` Andy Lutomirski
  2017-12-17  8:32       ` Ingo Molnar
@ 2017-12-17 11:41       ` Thomas Gleixner
  2017-12-17 15:15         ` Borislav Petkov
  1 sibling, 1 reply; 539+ messages in thread
From: Thomas Gleixner @ 2017-12-17 11:41 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, H. Peter Anvin,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov

On Sat, 16 Dec 2017, Andy Lutomirski wrote:
> On Fri, Dec 15, 2017 at 8:07 AM, Ingo Molnar <mingo@kernel.org> wrote:
> I few things I noticed in the PTI tree:
> 
> "x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
> Sorry, my bad, I've spent *way* too long looking at this crap to
> retain my sanity.  Also, if you're feeling like being super tidy, the
> init/main.c change in their could be folded in to whatever patch adds
> pti_init() in the first place, but it doesn't really matter.

I split it out into a separate patch and got rid of all the ifdef mess
while at it.

> "x86/pti: Map the vsyscall page if needed" has a change to
> pgtable_64.h that could be folded into an earlier patch.  This is
> probably my fault for applying Dave Hansen's cleanup request to the
> wrong patch.

Done

> "x86/mm/64: Make a full PGD-entry size hole in the memory map" would
> benefit from a mention of "5-level" somewhere in the subject or
> changelog.

Will do

> In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
> function "set_percpu_fixmap_ptes" is misnamed.  It should be something
> like "allocate_percpu_fixmap_ptes", perhaps,

Ok.

> and it should either warn
> or do nothing if the PTE is already present, I think.  As it stands,
> it's a wee bit dangerous.

Well, it's not more dangerous than all the other fixmap stuff we are
doing. The only difference is that it does not install a mapping, it just
makes sure that the PTE page is populated.

> X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> DISABLED_BUGS or whatever if it's not configured in, which will reduce
> bloat.  Borislav, that's kind of up your alley, since I don't think
> the appropriate mask even exists right now.

Will have a look.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-17  3:25     ` Andy Lutomirski
@ 2017-12-17  8:32       ` Ingo Molnar
  2017-12-17 11:41       ` Thomas Gleixner
  1 sibling, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-12-17  8:32 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov


* Andy Lutomirski <luto@kernel.org> wrote:

> > Is this tree looking good to you standalone?
> 
> I think this stuff is looking okay, although I'm a bit mystified by
> the whole merge base thing.  But if the backporters and Linus like it,
> then whatever.

To explain the merge base thing: considering that v4.14 is an LTS kernel the PTI 
merge base thing is really mostly about keeping this commit count low:

  triton:~/tip> git log --no-merges --oneline v4.14..tip/WIP.x86/pti.base | wc -l
  63

Versus the closest upstream work-alike equivalent base tree, which is, roughly:

  triton:~/tip> git log --no-merges --oneline v4.14..99306dfc067e | wc -l
  1088

1088 is a lot of commits to backport, all sourced from early in the merge window - 
which would also require the identification of ~dozens of random followup fixes 
later in the merge window and the -rc process - which fixes might not even 
cherry-pick cleanly due to other interactions...

So the upstream price of the 'PTI merge base' is 9 cherry-picks, to avoid 
backporting either 1000+ commits to -stable (not workable to -stable folks, 
especially as such wide backports also tend to explode exponentially by pulling in 
random dependencies as you try to backport them further back) or the non-Git 
backporting of 100+ iffy low level x86 entry code commits to the LTS kernel (still 
a nightmare to both the -stable and us x86 maintainers).

Note that doing this also allows tip:x86/pti to remain on this almost-v4.14 base 
for a few more weeks after an upstream merge, so it can collect any eventual fixes 
and minor enhancements into a linear series of commits. This, considering the 
complexity of PTI, is good both for bisectability and for backporting.

So as long as the ~9 cherry-picks are cleanly structured and are explicitly 
marked, this is the best all around solution we could think of.

( The somewhat weird git-merge gynastics in the tree I sent to Linus are really 
  about merging specific versions of upstream that are content-equivalent with
  the cherry-picks, to avoid massive conflicts. v4.15 had various other changes to 
  the files affected by the cherry-picking. If done naively the cherry-picked tree 
  merge can generate over a dozen nasty conflicts. Doing it this way also creates 
  more confidence in the cherry-picked base tree itself: -stable folks can trust 
  it more because it will be 'obviously' equivalent to upstream as expressed by
  the conflict-free merge. )

I believe the fact that the PTI patches have already been successfully backported 
to v4.9, with only minor additional cherry-picking, demonstrates that this is the 
right approach.

> I few things I noticed in the PTI tree:
> 
> "x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
> Sorry, my bad, I've spent *way* too long looking at this crap to
> retain my sanity.  Also, if you're feeling like being super tidy, the
> init/main.c change in their could be folded in to whatever patch adds
> pti_init() in the first place, but it doesn't really matter.
> 
> "x86/pti: Map the vsyscall page if needed" has a change to
> pgtable_64.h that could be folded into an earlier patch.  This is
> probably my fault for applying Dave Hansen's cleanup request to the
> wrong patch.
> 
> "x86/mm/64: Make a full PGD-entry size hole in the memory map" would
> benefit from a mention of "5-level" somewhere in the subject or
> changelog.
> 
> In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
> function "set_percpu_fixmap_ptes" is misnamed.  It should be something
> like "allocate_percpu_fixmap_ptes", perhaps, and it should either warn
> or do nothing if the PTE is already present, I think.  As it stands,
> it's a wee bit dangerous.
> 
> X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> DISABLED_BUGS or whatever if it's not configured in, which will reduce
> bloat.  Borislav, that's kind of up your alley, since I don't think
> the appropriate mask even exists right now.
> 
> 
> 
> Anyway, I stuck a few minor fixups here:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/log/?h=x86/pti

Thanks, we'll integrate all of this!

Also note that regarding PTI LDT handling, our plan is to stick to the review 
feedback consensus: i.e. we'll apply your PTI LDT fixes as the initial approach, 
plus an optional series later on once PTI is upstream, with the VMA based bits by 
Thomas and PeterZ subject to a fresh round of thinking & evaluation. Even if we 
decide to do the VMA approach, I don't think that aspect of PTI will be 
backported.

This should further simplify logistics and offloads risks as well.

	Ingo

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-15 16:07   ` Ingo Molnar
@ 2017-12-17  3:25     ` Andy Lutomirski
  2017-12-17  8:32       ` Ingo Molnar
  2017-12-17 11:41       ` Thomas Gleixner
  0 siblings, 2 replies; 539+ messages in thread
From: Andy Lutomirski @ 2017-12-17  3:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andy Lutomirski, Linus Torvalds, linux-kernel, Thomas Gleixner,
	H. Peter Anvin, Peter Zijlstra, Andrew Morton, Josh Poimboeuf,
	Borislav Petkov

On Fri, Dec 15, 2017 at 8:07 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@kernel.org> wrote:
>
>> On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
>> > Linus,
>>
>> >
>> >  - two 5-level paging related fixes
>>
>> Which reminds me: can you grab this one, too?
>>
>> https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org
>
> Yeah, done - it's now in x86/urgent as:
>
>   c739f930be1d: x86/espfix/64: Fix espfix double-fault handling on 5-level systems
>
> Will push it out soon. Linus will get it with the next x86/urgent batch, as
> there's no production 5-level paging CPUs out there yet, right?
>
> I'm also picking up your pending PTI fixes/updates over the weekend, but I spent
> today mostly to make sure that the system call trampoline bits and its
> dependencies were robust in practice.
>
> Could you please have a good look at that tree:
>
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tmp.tmp
>
> This includes the preparatory merges and cherry-picks, and some other low risk
> preparatory bits related to PTI.
>
> Is this tree looking good to you standalone?

I think this stuff is looking okay, although I'm a bit mystified by
the whole merge base thing.  But if the backporters and Linus like it,
then whatever.

I few things I noticed in the PTI tree:

"x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
Sorry, my bad, I've spent *way* too long looking at this crap to
retain my sanity.  Also, if you're feeling like being super tidy, the
init/main.c change in their could be folded in to whatever patch adds
pti_init() in the first place, but it doesn't really matter.

"x86/pti: Map the vsyscall page if needed" has a change to
pgtable_64.h that could be folded into an earlier patch.  This is
probably my fault for applying Dave Hansen's cleanup request to the
wrong patch.

"x86/mm/64: Make a full PGD-entry size hole in the memory map" would
benefit from a mention of "5-level" somewhere in the subject or
changelog.

In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
function "set_percpu_fixmap_ptes" is misnamed.  It should be something
like "allocate_percpu_fixmap_ptes", perhaps, and it should either warn
or do nothing if the PTE is already present, I think.  As it stands,
it's a wee bit dangerous.

X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
DISABLED_BUGS or whatever if it's not configured in, which will reduce
bloat.  Borislav, that's kind of up your alley, since I don't think
the appropriate mask even exists right now.



Anyway, I stuck a few minor fixups here:

https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/log/?h=x86/pti

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-15 15:50 ` Andy Lutomirski
@ 2017-12-15 16:07   ` Ingo Molnar
  2017-12-17  3:25     ` Andy Lutomirski
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2017-12-15 16:07 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov


* Andy Lutomirski <luto@kernel.org> wrote:

> On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > Linus,
> 
> >
> >  - two 5-level paging related fixes
> 
> Which reminds me: can you grab this one, too?
> 
> https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org

Yeah, done - it's now in x86/urgent as:

  c739f930be1d: x86/espfix/64: Fix espfix double-fault handling on 5-level systems

Will push it out soon. Linus will get it with the next x86/urgent batch, as 
there's no production 5-level paging CPUs out there yet, right?

I'm also picking up your pending PTI fixes/updates over the weekend, but I spent 
today mostly to make sure that the system call trampoline bits and its 
dependencies were robust in practice.

Could you please have a good look at that tree:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tmp.tmp

This includes the preparatory merges and cherry-picks, and some other low risk 
preparatory bits related to PTI.

Is this tree looking good to you standalone?

Note that the lack of runtime patching on non-Intel systems by default is 
intentionally not included at this stage, so that it's all simpler, and that we 
get more test coverage...

This tree is what I've been testing all day, including suspend/resume testing, and 
which I wanted to send to Linus later today as an RFC pull request, assuming 
there's no last minute showstoppers. (Right after I've seen the new Star Wars 
movie with my son - because priorities! ;-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 539+ messages in thread

* Re: [GIT PULL] x86 fixes
  2017-12-15 15:43 Ingo Molnar
@ 2017-12-15 15:50 ` Andy Lutomirski
  2017-12-15 16:07   ` Ingo Molnar
  0 siblings, 1 reply; 539+ messages in thread
From: Andy Lutomirski @ 2017-12-15 15:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Andy Lutomirski,
	Borislav Petkov

On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
> Linus,

>
>  - two 5-level paging related fixes

Which reminds me: can you grab this one, too?

https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-12-15 15:43 Ingo Molnar
  2017-12-15 15:50 ` Andy Lutomirski
  0 siblings, 1 reply; 539+ messages in thread
From: Ingo Molnar @ 2017-12-15 15:43 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Josh Poimboeuf, Andy Lutomirski, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 215eada73e77ede7e15531d99f712481ddd429be objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version

Misc fixes:

 - Fix the s2ram regression related to confusion around segment register 
   restoration, plus related cleanups that make the code more robust

 - a guess-unwinder Kconfig dependency fix

 - an isoimage build target fix for certain tool chain combinations

 - instruction decoder opcode map fixes+updates, and the syncing of the kernel 
   decoder headers to the objtool headers

 - a kmmio tracing fix

 - two 5-level paging related fixes

 - a topology enumeration fix on certain SMP systems

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y

Andy Lutomirski (3):
      x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
      x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
      x86/power: Make restore_processor_context() sane

Changbin Du (1):
      x86/build: Don't verify mtools configuration file for isoimage

Ingo Molnar (1):
      objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version

Karol Herbst (1):
      x86/mm/kmmio: Fix mmiotrace for page unaligned addresses

Kirill A. Shutemov (2):
      x86/boot/compressed/64: Detect and handle 5-level paging at boot-time
      x86/boot/compressed/64: Print error if 5-level paging is not supported

Prarit Bhargava (1):
      x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation

Randy Dunlap (1):
      x86/decoder: Fix and update the opcodes map


 arch/x86/Kconfig.debug                             |  1 +
 arch/x86/boot/compressed/Makefile                  |  1 +
 arch/x86/boot/compressed/head_64.S                 | 16 +++-
 arch/x86/boot/compressed/misc.c                    | 16 ++++
 arch/x86/boot/compressed/pgtable_64.c              | 28 ++++++
 arch/x86/boot/genimage.sh                          |  4 +-
 arch/x86/include/asm/suspend_32.h                  |  8 +-
 arch/x86/include/asm/suspend_64.h                  | 19 ++++-
 arch/x86/kernel/smpboot.c                          |  4 +-
 arch/x86/lib/x86-opcode-map.txt                    | 13 ++-
 arch/x86/mm/ioremap.c                              |  4 +-
 arch/x86/mm/kmmio.c                                | 12 +--
 arch/x86/power/cpu.c                               | 99 ++++++++++------------
 tools/objtool/arch/x86/insn/inat.h                 | 10 +++
 tools/objtool/arch/x86/insn/x86-opcode-map.txt     | 15 +++-
 .../perf/util/intel-pt-decoder/x86-opcode-map.txt  | 15 +++-
 16 files changed, 184 insertions(+), 81 deletions(-)
 create mode 100644 arch/x86/boot/compressed/pgtable_64.c

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
 config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
+	depends on !STACKDEPOT
 	---help---
 	  This option enables the "guess" unwinder for unwinding kernel stack
 	  traces.  It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
 ifdef CONFIG_X86_64
 	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
 	vmlinux-objs-y += $(obj)/mem_encrypt.o
+	vmlinux-objs-y += $(obj)/pgtable_64.o
 endif
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
 	leaq	boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
-	/* Check if 5-level paging has already enabled */
-	movq	%cr4, %rax
-	testl	$X86_CR4_LA57, %eax
-	jnz	lvl5
+	/*
+	 * Check if we need to enable 5-level paging.
+	 * RSI holds real mode data and need to be preserved across
+	 * a function call.
+	 */
+	pushq	%rsi
+	call	l5_paging_required
+	popq	%rsi
+
+	/* If l5_paging_required() returned zero, we're done here. */
+	cmpq	$0, %rax
+	je	lvl5
 
 	/*
 	 * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
 	}
 }
 
+static bool l5_supported(void)
+{
+	/* Check if leaf 7 is supported. */
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
 #if CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
 			       unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	console_init();
 	debug_putstr("early console in extract_kernel\n");
 
+	if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+		error("This linux kernel as configured requires 5-level paging\n"
+			"This CPU does not support the required 'cr4.la57' feature\n"
+			"Unable to boot - please use a kernel appropriate for your CPU\n");
+	}
+
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+	/* Check if leaf 7 is supported. */
+
+	if (native_cpuid_eax(0) < 7)
+		return 0;
+
+	/* Check if la57 is supported. */
+	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return 0;
+
+	/* Check if 5-level paging has already been enabled. */
+	if (native_read_cr4() & X86_CR4_LA57)
+		return 0;
+
+	return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..c9e8499fbfe7 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
 
 # Make sure the files actually exist
 verify "$FBZIMAGE"
-verify "$MTOOLSRC"
 
 genbzdisk() {
+	verify "$MTOOLSRC"
 	mformat a:
 	syslinux $FIMAGE
 	echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
 }
 
 genfdimage144() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
 	mformat v:
 	syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
 }
 
 genfdimage288() {
+	verify "$MTOOLSRC"
 	dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
 	mformat w:
 	syslinux $FIMAGE
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
 
 /* image of the saved processor state */
 struct saved_context {
-	u16 es, fs, gs, ss;
+	/*
+	 * On x86_32, all segment registers, with the possible exception of
+	 * gs, are saved at kernel entry in pt_regs.
+	 */
+#ifdef CONFIG_X86_32_LAZY_GS
+	u16 gs;
+#endif
 	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
 	bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
  */
 struct saved_context {
 	struct pt_regs regs;
-	u16 ds, es, fs, gs, ss;
-	unsigned long gs_base, gs_kernel_base, fs_base;
+
+	/*
+	 * User CS and SS are saved in current_pt_regs().  The rest of the
+	 * segment selectors need to be saved and restored here.
+	 */
+	u16 ds, es, fs, gs;
+
+	/*
+	 * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+	 * so we save them separately.  We save the kernelmode GSBASE to
+	 * restore percpu access after resume.
+	 */
+	unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
 	unsigned long cr0, cr2, cr3, cr4, cr8;
 	u64 misc_enable;
 	bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
 	u16 gdt_pad; /* Unused */
 	struct desc_ptr gdt_desc;
 	u16 idt_pad;
-	u16 idt_limit;
-	unsigned long idt_base;
+	struct desc_ptr idt;
 	u16 ldt;
 	u16 tss;
 	unsigned long tr;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 05a97d5fe298..35cb20994e32 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
 
 /* Flag to indicate if a complete sched domain rebuild is required */
 bool x86_topology_update;
@@ -1304,7 +1304,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	 * Today neither Intel nor AMD support heterogenous systems so
 	 * extrapolate the boot cpu's data to all packages.
 	 */
-	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
 	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
 	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
+	mmiotrace_iounmap(addr);
+
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
 
-	mmiotrace_iounmap(addr);
-
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	unsigned long flags;
 	int ret = 0;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	unsigned int l;
 	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	if (get_kmmio_probe(p->addr)) {
+	if (get_kmmio_probe(addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte) {
 		ret = -EINVAL;
 		goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
-		if (add_kmmio_fault_page(p->addr + size))
+		if (add_kmmio_fault_page(addr + size))
 			pr_err("Unable to set page fault.\n");
 		size += page_level_size(l);
 	}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 {
 	unsigned long flags;
 	unsigned long size = 0;
+	unsigned long addr = p->addr & PAGE_MASK;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
 	unsigned int l;
 	pte_t *pte;
 
-	pte = lookup_address(p->addr, &l);
+	pte = lookup_address(addr, &l);
 	if (!pte)
 		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
-		release_kmmio_fault_page(p->addr + size, &release_list);
+		release_kmmio_fault_page(addr + size, &release_list);
 		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de14f4df..36a28eddb435 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * descriptor tables
 	 */
-#ifdef CONFIG_X86_32
 	store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
 	/*
 	 * We save it here, but restore it only in the hibernate case.
 	 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * segment registers
 	 */
-#ifdef CONFIG_X86_32
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
 	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+	savesegment(gs, ctxt->gs);
+	savesegment(fs, ctxt->fs);
+	savesegment(ds, ctxt->ds);
+	savesegment(es, ctxt->es);
 
 	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
 	mtrr_save_fixed_ranges(NULL);
 
 	rdmsrl(MSR_EFER, ctxt->efer);
@@ -178,6 +170,9 @@ static void fix_processor_context(void)
 	write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
 
 	syscall_init();				/* This sets MSR_*STAR and related */
+#else
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
 #endif
 	load_TR_desc();				/* This does ltr */
 	load_mm_ldt(current->active_mm);	/* This does lldt */
@@ -190,9 +185,12 @@ static void fix_processor_context(void)
 }
 
 /**
- *	__restore_processor_state - restore the contents of CPU registers saved
- *		by __save_processor_state()
- *	@ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ *                             by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
  */
 static void notrace __restore_processor_state(struct saved_context *ctxt)
 {
@@ -215,57 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
 
+	/* Restore the IDT. */
+	load_idt(&ctxt->idt);
+
 	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
+	 * Just in case the asm code got us here with the SS, DS, or ES
+	 * out of sync with the GDT, update them.
 	 */
-#ifdef CONFIG_X86_32
-	load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
+	loadsegment(ss, __KERNEL_DS);
+	loadsegment(ds, __USER_DS);
+	loadsegment(es, __USER_DS);
 
-#ifdef CONFIG_X86_64
 	/*
-	 * We need GSBASE restored before percpu access can work.
-	 * percpu access can happen in exception handlers or in complicated
-	 * helpers like load_gs_index().
+	 * Restore percpu access.  Percpu access can happen in exception
+	 * handlers or in complicated helpers like load_gs_index().
 	 */
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+	wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+	loadsegment(fs, __KERNEL_PERCPU);
+	loadsegment(gs, __KERNEL_STACK_CANARY);
 #endif
 
+	/* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
 	fix_processor_context();
 
 	/*
-	 * Restore segment registers.  This happens after restoring the GDT
-	 * and LDT, which happen in fix_processor_context().
+	 * Now that we have descriptor tables fully restored and working
+	 * exception handling, restore the usermode segments.
 	 */
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+	loadsegment(ds, ctxt->es);
 	loadsegment(es, ctxt->es);
 	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
-
-	/*
-	 * sysenter MSRs
-	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
 	load_gs_index(ctxt->gs);
-	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
 	/*
-	 * Restore FSBASE and user GSBASE after reloading the respective
-	 * segment selectors.
+	 * Restore FSBASE and GSBASE after restoring the selectors, since
+	 * restoring the selectors clobbers the bases.  Keep in mind
+	 * that MSR_KERNEL_GS_BASE is horribly misnamed.
 	 */
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+	loadsegment(gs, ctxt->gs);
 #endif
 
 	do_fpu_end();
diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h
index 125ecd2a300d..52dc8d911173 100644
--- a/tools/objtool/arch/x86/insn/inat.h
+++ b/tools/objtool/arch/x86/insn/inat.h
@@ -97,6 +97,16 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
 EndTable
 
 Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
 88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
 EndTable
 
 GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
 EndTable
 
 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-12-06 22:36 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-12-06 22:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 88edb57d1e0b262e669c5cad36646dcf5a7f37f5 x86/vdso: Change time() prototype to match __vdso_time()

Misc fixes:

 - Make CR4 handling irq-safe, which bug vmware guests ran into
 - Don't crash on early IRQs in Xen guests
 - Don't crash secondary CPU bringup if #UD assisted WARN()ings are triggered
 - Make X86_BUG_FXSAVE_LEAK optional on newer AMD CPUs that have the fix
 - Fix AMD Fam17h microcode loading
 - Fix broadcom_postcore_init() if ACPI is disabled
 - Fix resume regression in __restore_processor_context()
 - Fix Sparse warnings
 - Fix a GCC-8 warning

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/power: Fix some ordering bugs in __restore_processor_context()

Arnd Bergmann (1):
      x86/vdso: Change time() prototype to match __vdso_time()

Chunyu Hu (1):
      x86/idt: Load idt early in start_secondary

Colin Ian King (1):
      x86: Fix Sparse warnings about non-static functions

Juergen Gross (1):
      x86/xen: Support early interrupts in xen pv guests

Nadav Amit (2):
      x86/tlb: Refactor CR4 setting and shadow write
      x86/tlb: Disable interrupts when changing CR4

Rafael J. Wysocki (1):
      x86/PCI: Make broadcom_postcore_init() check acpi_disabled

Rudolf Marek (1):
      x86/cpufeatures: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD

Tom Lendacky (1):
      x86/microcode/AMD: Add support for fam17h microcode loading


 arch/x86/entry/vdso/vclock_gettime.c |  2 +-
 arch/x86/include/asm/cpufeatures.h   |  1 +
 arch/x86/include/asm/segment.h       | 12 ++++++++++++
 arch/x86/include/asm/tlbflush.h      | 35 ++++++++++++++++++----------------
 arch/x86/kernel/apic/vector.c        |  4 ++--
 arch/x86/kernel/cpu/amd.c            |  7 +++++--
 arch/x86/kernel/cpu/microcode/amd.c  |  4 ++++
 arch/x86/kernel/process.c            |  2 +-
 arch/x86/kernel/smpboot.c            |  2 +-
 arch/x86/mm/extable.c                |  4 +++-
 arch/x86/pci/broadcom_bus.c          |  2 +-
 arch/x86/platform/uv/uv_nmi.c        |  4 ++--
 arch/x86/power/cpu.c                 | 21 ++++++++++++++++----
 arch/x86/xen/enlighten_pv.c          | 37 +++++++++++++++++++++++-------------
 arch/x86/xen/xen-asm_64.S            | 14 ++++++++++++++
 15 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
 		*t = result;
 	return result;
 }
-int time(time_t *t)
+time_t time(time_t *t)
 	__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..800104c8a3ed 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -266,6 +266,7 @@
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
  */
 #define EARLY_IDT_HANDLER_SIZE 9
 
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
 #ifndef __ASSEMBLY__
 
 extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 extern void early_ignore_irq(void);
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
 /*
  * Load a segment. Fall back on loading the zero segment if something goes
  * wrong.  This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..877b5c1a1b12 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -173,40 +173,43 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
+static inline void __cr4_set(unsigned long cr4)
+{
+	lockdep_assert_irqs_disabled();
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+	__write_cr4(cr4);
+}
+
 /* Set in this cpu's CR4. */
 static inline void cr4_set_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 | mask) != cr4) {
-		cr4 |= mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 | mask) != cr4)
+		__cr4_set(cr4 | mask);
+	local_irq_restore(flags);
 }
 
 /* Clear in this cpu's CR4. */
 static inline void cr4_clear_bits(unsigned long mask)
 {
-	unsigned long cr4;
+	unsigned long cr4, flags;
 
+	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 & ~mask) != cr4) {
-		cr4 &= ~mask;
-		this_cpu_write(cpu_tlbstate.cr4, cr4);
-		__write_cr4(cr4);
-	}
+	if ((cr4 & ~mask) != cr4)
+		__cr4_set(cr4 & ~mask);
+	local_irq_restore(flags);
 }
 
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
 {
 	unsigned long cr4;
 
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	cr4 ^= mask;
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
+	__cr4_set(cr4 ^ mask);
 }
 
 /* Read the CR4 shadow. */
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..750449152b04 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,8 +542,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 }
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
-			   struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+				  struct irq_data *irqd, int ind)
 {
 	unsigned int cpu, vector, prev_cpu, prev_vector;
 	struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..bcb75dc97d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x17: init_amd_zn(c); break;
 	}
 
-	/* Enable workaround for FXSAVE leak */
-	if (c->x86 >= 6)
+	/*
+	 * Enable workaround for FXSAVE leak on CPUs
+	 * without a XSaveErPtr feature
+	 */
+	if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
 		set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
 
 	cpu_detect_cache_sizes(c);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 #define F14H_MPB_MAX_SIZE 1824
 #define F15H_MPB_MAX_SIZE 4096
 #define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
 
 	switch (family) {
 	case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	case 0x16:
 		max_size = F16H_MPB_MAX_SIZE;
 		break;
+	case 0x17:
+		max_size = F17H_MPB_MAX_SIZE;
+		break;
 	default:
 		max_size = F1XH_MPB_MAX_SIZE;
 		break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..bb988a24db92 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -299,7 +299,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	}
 
 	if ((tifp ^ tifn) & _TIF_NOTSC)
-		cr4_toggle_bits(X86_CR4_TSD);
+		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
 
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..05a97d5fe298 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -237,7 +237,7 @@ static void notrace start_secondary(void *unused)
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
-
+	load_current_idt();
 	cpu_init();
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..88754bfd425f 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
+#include <xen/xen.h>
 
 #include <asm/fpu/internal.h>
 #include <asm/traps.h>
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	 * Old CPUs leave the high bits of CS on the stack
 	 * undefined.  I'm not sure which CPUs do this, but at least
 	 * the 486 DX works this way.
+	 * Xen pv domains are not using the default __KERNEL_CS.
 	 */
-	if (regs->cs != __KERNEL_CS)
+	if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
 	 * We should get host bridge information from ACPI unless the BIOS
 	 * doesn't support it.
 	 */
-	if (acpi_os_get_root_pointer())
+	if (!acpi_disabled && acpi_os_get_root_pointer())
 		return 0;
 #endif
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
 /*
  * UV NMI handler
  */
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 {
 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
 	int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
 }
 
 /* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
 {
 	int size = sizeof(void *) * (1 << NODES_SHIFT);
 	int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..5191de14f4df 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -226,8 +226,20 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
 #endif
 
+#ifdef CONFIG_X86_64
 	/*
-	 * segment registers
+	 * We need GSBASE restored before percpu access can work.
+	 * percpu access can happen in exception handlers or in complicated
+	 * helpers like load_gs_index().
+	 */
+	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#endif
+
+	fix_processor_context();
+
+	/*
+	 * Restore segment registers.  This happens after restoring the GDT
+	 * and LDT, which happen in fix_processor_context().
 	 */
 #ifdef CONFIG_X86_32
 	loadsegment(es, ctxt->es);
@@ -248,13 +260,14 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	load_gs_index(ctxt->gs);
 	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
+	/*
+	 * Restore FSBASE and user GSBASE after reloading the respective
+	 * segment selectors.
+	 */
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 #endif
 
-	fix_processor_context();
-
 	do_fpu_end();
 	tsc_verify_tsc_adjust(true);
 	x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..f2414c6c5e7c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = {
 	{ simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
 };
 
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
 {
 	unsigned int nr;
 	bool ist_okay = false;
@@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
 		}
 	}
 
+	if (nr == ARRAY_SIZE(trap_array) &&
+	    *addr >= (void *)early_idt_handler_array[0] &&
+	    *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+		nr = (*addr - (void *)early_idt_handler_array[0]) /
+		     EARLY_IDT_HANDLER_SIZE;
+		*addr = (void *)xen_early_idt_handler_array[nr];
+	}
+
 	if (WARN_ON(ist != 0 && !ist_okay))
 		return false;
 
@@ -1262,6 +1270,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_setup_gdt(0);
 
 	xen_init_irq_ops();
+
+	/* Let's presume PV guests always boot on vCPU with id 0. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
+	/*
+	 * Setup xen_vcpu early because idt_setup_early_handler needs it for
+	 * local_irq_disable(), irqs_disabled().
+	 *
+	 * Don't do the full vcpu_info placement stuff until we have
+	 * the cpu_possible_mask and a non-dummy shared_info.
+	 */
+	xen_vcpu_info_reset(0);
+
+	idt_setup_early_handler();
+
 	xen_init_capabilities();
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1318,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	acpi_numa = -1;
 #endif
-	/* Let's presume PV guests always boot on vCPU with id 0. */
-	per_cpu(xen_vcpu_id, 0) = 0;
-
-	/*
-	 * Setup xen_vcpu early because start_kernel needs it for
-	 * local_irq_disable(), irqs_disabled().
-	 *
-	 * Don't do the full vcpu_info placement stuff until we have
-	 * the cpu_possible_mask and a non-dummy shared_info.
-	 */
-	xen_vcpu_info_reset(0);
-
 	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
 
 	local_irq_disable();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
 
 #include <xen/interface/xen.h>
 
+#include <linux/init.h>
 #include <linux/linkage.h>
 
 .macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
 #endif
 xen_pv_trap hypervisor_callback
 
+	__INIT
+ENTRY(xen_early_idt_handler_array)
+	i = 0
+	.rept NUM_EXCEPTION_VECTORS
+	pop %rcx
+	pop %r11
+	jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+	i = i + 1
+	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+	.endr
+END(xen_early_idt_handler_array)
+	__FINIT
+
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
  * Xen64 iret frame:

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-11-26 12:48 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-11-26 12:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Andy Lutomirski

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 12a78d43de767eaf8fb272facb7a7b6f2dc6a9df x86/decoder: Add new TEST instruction pattern

Misc fixes:

 - topology enumeration fixes
 - KASAN fix
 - two entry fixes (not yet the big series related to KASLR)
 - remove obsolete code
 - instruction decoder fix
 - better /dev/mem sanity checks, hopefully working better this time
 - pkeys fixes
 - two ACPI fixes
 - 5-level paging related fixes
 - UMIP fixes that should make application visible faults more debuggable
 - boot fix for weird virtualization environment

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/pci/Kconfig                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  drivers/pci/Makefile               # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  drivers/pci/htirq.c                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  include/linux/htirq.h              # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  include/linux/pci.h                # fd2fa6c18b72: x86/PCI: Remove unused Hyper
  tools/testing/selftests/x86/5lvl.c # 97f404ad3e53: x86/selftests: Add test for 
  tools/testing/selftests/x86/Makefile# 97f404ad3e53: x86/selftests: Add test for 
  tools/testing/selftests/x86/mpx-hw.h# a6400120d042: x86/mpx/selftests: Fix up we
  tools/testing/selftests/x86/pkey-helpers.h# 7b659ee3e1fe: x86/pkeys/selftests: Fix pro
  tools/testing/selftests/x86/protection_keys.c# 91c49c2deb96: x86/pkeys/selftests: Rename 

 Thanks,

	Ingo

------------------>
Andi Kleen (2):
      perf/x86/intel/uncore: Cache logical pkg id in uncore driver
      x86/topology: Avoid wasting 128k for package id array

Andrey Ryabinin (1):
      x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow

Andy Lutomirski (2):
      x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing
      x86/entry/64: Add missing irqflags tracing to native_load_gs_index()

Bjorn Helgaas (1):
      x86/PCI: Remove unused HyperTransport interrupt support

Borislav Petkov (1):
      x86/umip: Fix insn_get_code_seg_params()'s return value

Chao Fan (1):
      x86/boot/KASLR: Remove unused variable

Craig Bergstrom (1):
      x86/mm: Limit mmap() of /dev/mem to valid physical addresses

Dave Hansen (4):
      x86/pkeys: Update documentation about availability
      x86/mpx/selftests: Fix up weird arrays
      x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey'
      x86/pkeys/selftests: Fix protection keys write() warning

Kirill A. Shutemov (2):
      x86/mm: Prevent non-MAP_FIXED mapping across DEFAULT_MAP_WINDOW border
      x86/selftests: Add test for mapping placement for 5-level paging

Masami Hiramatsu (1):
      x86/decoder: Add new TEST instruction pattern

Prarit Bhargava (1):
      x86/smpboot: Fix __max_logical_packages estimate

Ricardo Neri (4):
      x86/umip: Select X86_INTEL_UMIP by default
      x86/umip: Print a line in the boot log that UMIP has been enabled
      x86/umip: Identify the STR and SLDT instructions
      x86/umip: Print a warning into the syslog if UMIP-protected instructions are used

Tom Lendacky (1):
      x86/boot: Fix boot failure when SMP MP-table is based at 0

Vikas C Sajjan (2):
      x86/acpi: Handle SCI interrupts above legacy space gracefully
      x86/acpi: Reduce code duplication in mp_override_legacy_irq()


 Documentation/x86/protection-keys.txt         |   9 +-
 arch/x86/Kconfig                              |  14 +-
 arch/x86/boot/compressed/kaslr.c              |   5 +-
 arch/x86/entry/entry_64.S                     |  14 +-
 arch/x86/events/intel/uncore.c                |   4 +-
 arch/x86/events/intel/uncore.h                |   2 +-
 arch/x86/events/intel/uncore_snbep.c          |   2 +-
 arch/x86/include/asm/elf.h                    |   1 +
 arch/x86/include/asm/hw_irq.h                 |   8 --
 arch/x86/include/asm/hypertransport.h         |  46 ------
 arch/x86/include/asm/insn-eval.h              |   2 +-
 arch/x86/include/asm/io.h                     |   4 +
 arch/x86/include/asm/irqdomain.h              |   6 -
 arch/x86/include/asm/processor.h              |   1 +
 arch/x86/kernel/acpi/boot.c                   |  61 +++++---
 arch/x86/kernel/apic/Makefile                 |   1 -
 arch/x86/kernel/apic/htirq.c                  | 198 --------------------------
 arch/x86/kernel/apic/vector.c                 |   5 +-
 arch/x86/kernel/cpu/common.c                  |   2 +
 arch/x86/kernel/mpparse.c                     |   6 +-
 arch/x86/kernel/smpboot.c                     | 128 +++++------------
 arch/x86/kernel/sys_x86_64.c                  |  10 +-
 arch/x86/kernel/umip.c                        |  88 ++++++++++--
 arch/x86/lib/insn-eval.c                      |   4 +-
 arch/x86/lib/x86-opcode-map.txt               |   2 +-
 arch/x86/mm/hugetlbpage.c                     |  11 +-
 arch/x86/mm/kasan_init_64.c                   | 143 ++++++++++++++++++-
 arch/x86/mm/mmap.c                            |  62 ++++++++
 drivers/char/mem.c                            |   4 +
 drivers/pci/Kconfig                           |   9 --
 drivers/pci/Makefile                          |   3 -
 drivers/pci/htirq.c                           | 135 ------------------
 include/linux/htirq.h                         |  39 -----
 include/linux/pci.h                           |   6 -
 tools/testing/selftests/x86/5lvl.c            | 177 +++++++++++++++++++++++
 tools/testing/selftests/x86/Makefile          |   2 +-
 tools/testing/selftests/x86/mpx-hw.h          |   4 +-
 tools/testing/selftests/x86/pkey-helpers.h    |   5 +-
 tools/testing/selftests/x86/protection_keys.c |  10 +-
 39 files changed, 609 insertions(+), 624 deletions(-)
 delete mode 100644 arch/x86/include/asm/hypertransport.h
 delete mode 100644 arch/x86/kernel/apic/htirq.c
 delete mode 100644 drivers/pci/htirq.c
 delete mode 100644 include/linux/htirq.h
 create mode 100644 tools/testing/selftests/x86/5lvl.c

diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt
index fa46dcb347bc..ecb0d2dadfb7 100644
--- a/Documentation/x86/protection-keys.txt
+++ b/Documentation/x86/protection-keys.txt
@@ -1,5 +1,10 @@
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
-which will be found on future Intel CPUs.
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
+which is found on Intel's Skylake "Scalable Processor" Server CPUs.
+It will be avalable in future non-server parts.
+
+For anyone wishing to test or use this feature, it is available in
+Amazon's EC2 C5 instances and is known to work there using an Ubuntu
+17.04 image.
 
 Memory Protection Keys provides a mechanism for enforcing page-based
 protections, but without requiring modification of the page tables
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f08977d82ca0..09dcc94c4484 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -110,7 +110,7 @@ config X86
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
 	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_ARCH_KASAN			if X86_64 && SPARSEMEM_VMEMMAP
+	select HAVE_ARCH_KASAN			if X86_64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
@@ -1805,14 +1805,20 @@ config X86_SMAP
 	  If unsure, say Y.
 
 config X86_INTEL_UMIP
-	def_bool n
+	def_bool y
 	depends on CPU_SUP_INTEL
 	prompt "Intel User Mode Instruction Prevention" if EXPERT
 	---help---
 	  The User Mode Instruction Prevention (UMIP) is a security
 	  feature in newer Intel processors. If enabled, a general
-	  protection fault is issued if the instructions SGDT, SLDT,
-	  SIDT, SMSW and STR are executed in user mode.
+	  protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+	  or STR instructions are executed in user mode. These instructions
+	  unnecessarily expose information about the hardware state.
+
+	  The vast majority of applications do not use these instructions.
+	  For the very few that do, software emulation is provided in
+	  specific cases in protected and virtual-8086 modes. Emulated
+	  results are dummy.
 
 config X86_INTEL_MPX
 	prompt "Intel MPX (Memory Protection Extensions)"
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a63fbc25ce84..8199a6187251 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
 static void mem_avoid_memmap(char *str)
 {
 	static int i;
-	int rc;
 
 	if (i >= MAX_MEMMAP_REGIONS)
 		return;
@@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
 		return 0;
 
 	tmp_cmdline = malloc(len + 1);
-	if (!tmp_cmdline )
+	if (!tmp_cmdline)
 		error("Failed to allocate space for tmp_cmdline");
 
 	memcpy(tmp_cmdline, args, len);
@@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	cmd_line |= boot_params->hdr.cmd_line_ptr;
 	/* Calculate size of cmd_line. */
 	ptr = (char *)(unsigned long)cmd_line;
-	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+	for (cmd_line_size = 0; ptr[cmd_line_size++];)
 		;
 	mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
 	mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a2b30ec69497..f81d50d7ceac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
 END(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
-.macro TRACE_IRQS_IRETQ
+.macro TRACE_IRQS_FLAGS flags:req
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt	$9, EFLAGS(%rsp)		/* interrupts off? */
+	bt	$9, \flags		/* interrupts off? */
 	jnc	1f
 	TRACE_IRQS_ON
 1:
 #endif
 .endm
 
+.macro TRACE_IRQS_IRETQ
+	TRACE_IRQS_FLAGS EFLAGS(%rsp)
+.endm
+
 /*
  * When dynamic function tracer is enabled it will add a breakpoint
  * to all locations that it is about to modify, sync CPUs, update
@@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	TRACE_IRQS_OFF
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER_DS			/* pt_regs->ss */
 	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
@@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
 	UNWIND_HINT_REGS extra=0
 
+	TRACE_IRQS_OFF
+
 	/*
 	 * If we need to do entry work or if we guess we'll need to do
 	 * exit work, go straight to the slow path.
@@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
 	FRAME_BEGIN
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+	TRACE_IRQS_OFF
 	SWAPGS
 .Lgs_change:
 	movl	%edi, %gs
 2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	SWAPGS
+	TRACE_IRQS_FLAGS (%rsp)
 	popfq
 	FRAME_END
 	ret
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d45e06346f14..7874c980d569 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 	int i, phys_id, pkg;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
-	pkg = topology_phys_to_logical_pkg(phys_id);
 
 	box = pci_get_drvdata(pdev);
 	if (!box) {
+		pkg = topology_phys_to_logical_pkg(phys_id);
 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
 			if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
 				uncore_extra_pci_dev[pkg].dev[i] = NULL;
@@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 		return;
 
 	pci_set_drvdata(pdev, NULL);
-	pmu->boxes[pkg] = NULL;
+	pmu->boxes[box->pkgid] = NULL;
 	if (atomic_dec_return(&pmu->activeboxes) == 0)
 		uncore_pmu_unregister(pmu);
 	uncore_box_exit(box);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4364191e7c6b..414dc7e7c950 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
 
 struct intel_uncore_box {
 	int pci_phys_id;
-	int pkgid;
+	int pkgid;	/* Logical package ID */
 	int n_active;	/* number of active events */
 	int n_events;
 	int cpu;	/* cpu to collect events */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..de8f8625213c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
 
 	if (reg1->idx != EXTRA_REG_NONE) {
 		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-		int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+		int pkg = box->pkgid;
 		struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
 
 		if (filter_pdev) {
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3a091cea36c5..0d157d2a1e2a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
 extern unsigned long task_size_32bit(void);
 extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
+extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b80e46733909..2851077b6051 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -99,14 +99,6 @@ struct irq_alloc_info {
 			void		*dmar_data;
 		};
 #endif
-#ifdef	CONFIG_HT_IRQ
-		struct {
-			int		ht_pos;
-			int		ht_idx;
-			struct pci_dev	*ht_dev;
-			void		*ht_update;
-		};
-#endif
 #ifdef	CONFIG_X86_UV
 		struct {
 			int		uv_limit;
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
deleted file mode 100644
index 5d55df352879..000000000000
--- a/arch/x86/include/asm/hypertransport.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_HYPERTRANSPORT_H
-#define _ASM_X86_HYPERTRANSPORT_H
-
-/*
- * Constants for x86 Hypertransport Interrupts.
- */
-
-#define HT_IRQ_LOW_BASE			0xf8000000
-
-#define HT_IRQ_LOW_VECTOR_SHIFT		16
-#define HT_IRQ_LOW_VECTOR_MASK		0x00ff0000
-#define HT_IRQ_LOW_VECTOR(v)						\
-	(((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
-
-#define HT_IRQ_LOW_DEST_ID_SHIFT	8
-#define HT_IRQ_LOW_DEST_ID_MASK		0x0000ff00
-#define HT_IRQ_LOW_DEST_ID(v)						\
-	(((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
-
-#define HT_IRQ_LOW_DM_PHYSICAL		0x0000000
-#define HT_IRQ_LOW_DM_LOGICAL		0x0000040
-
-#define HT_IRQ_LOW_RQEOI_EDGE		0x0000000
-#define HT_IRQ_LOW_RQEOI_LEVEL		0x0000020
-
-
-#define HT_IRQ_LOW_MT_FIXED		0x0000000
-#define HT_IRQ_LOW_MT_ARBITRATED	0x0000004
-#define HT_IRQ_LOW_MT_SMI		0x0000008
-#define HT_IRQ_LOW_MT_NMI		0x000000c
-#define HT_IRQ_LOW_MT_INIT		0x0000010
-#define HT_IRQ_LOW_MT_STARTUP		0x0000014
-#define HT_IRQ_LOW_MT_EXTINT		0x0000018
-#define HT_IRQ_LOW_MT_LINT1		0x000008c
-#define HT_IRQ_LOW_MT_LINT0		0x0000098
-
-#define HT_IRQ_LOW_IRQ_MASKED		0x0000001
-
-
-#define HT_IRQ_HIGH_DEST_ID_SHIFT	0
-#define HT_IRQ_HIGH_DEST_ID_MASK	0x00ffffff
-#define HT_IRQ_HIGH_DEST_ID(v)						\
-	((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
-
-#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index e1d3b4ce8a92..2b6ccf2c49f1 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -18,6 +18,6 @@
 void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
-char insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_code_seg_params(struct pt_regs *regs);
 
 #endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 93ae8aee1780..95e948627fd0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 
 #endif
 
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index f695cc6b8e1f..139feef467f7 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
 static inline void arch_init_msi_domain(struct irq_domain *domain) { }
 #endif
 
-#ifdef CONFIG_HT_IRQ
-extern void arch_init_htirq_domain(struct irq_domain *domain);
-#else
-static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
-#endif
-
 #endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2db7cf720b04..cc16fa882e3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 	u32			microcode;
+	unsigned		initialized : 1;
 } __randomize_layout;
 
 struct cpuid_regs {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ef9e02e614d0..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 #ifdef CONFIG_X86_IO_APIC
 #define MP_ISA_BUS		0
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi);
+
 static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 					  u32 gsi)
 {
-	int ioapic;
-	int pin;
-	struct mpc_intsrc mp_irq;
-
 	/*
 	 * Check bus_irq boundary.
 	 */
@@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 		return;
 	}
 
-	/*
-	 * Convert 'gsi' to 'ioapic.pin'.
-	 */
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0)
-		return;
-	pin = mp_find_ioapic_pin(ioapic, gsi);
-
 	/*
 	 * TBD: This check is for faulty timer entries, where the override
 	 *      erroneously sets the trigger to level, resulting in a HUGE
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irq.type = MP_INTSRC;
-	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (trigger << 2) | polarity;
-	mp_irq.srcbus = MP_ISA_BUS;
-	mp_irq.srcbusirq = bus_irq;	/* IRQ */
-	mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
-	mp_irq.dstirq = pin;	/* INTIN# */
-
-	mp_save_irq(&mp_irq);
-
+	if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+		return;
 	/*
 	 * Reset default identity mapping if gsi is also an legacy IRQ,
 	 * otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
 	return 0;
 }
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi)
+{
+	struct mpc_intsrc mp_irq;
+	int ioapic, pin;
+
+	/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0) {
+		pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+		return ioapic;
+	}
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (trigger << 2) | polarity;
+	mp_irq.srcbus = MP_ISA_BUS;
+	mp_irq.srcbusirq = bus_irq;
+	mp_irq.dstapic = mpc_ioapic_id(ioapic);
+	mp_irq.dstirq = pin;
+
+	mp_save_irq(&mp_irq);
+
+	return 0;
+}
+
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
 	if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
 		polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
-	mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	if (bus_irq < NR_IRQS_LEGACY)
+		mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	else
+		mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
 	acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
 	/*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a9e08924927e..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,7 +12,6 @@ obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_PCI_MSI)		+= msi.o
-obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index b07075dce8b7..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- *	Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- *	Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_data *parent = data->parent_data;
-	int ret;
-
-	ret = parent->chip->irq_set_affinity(parent, mask, force);
-	if (ret >= 0) {
-		struct ht_irq_msg msg;
-		struct irq_cfg *cfg = irqd_cfg(data);
-
-		fetch_ht_irq_msg(data->irq, &msg);
-		msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
-				    HT_IRQ_LOW_DEST_ID_MASK);
-		msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
-				  HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
-		msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-		msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-		write_ht_irq_msg(data->irq, &msg);
-	}
-
-	return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= irq_chip_ack_parent,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= irq_chip_retrigger_hierarchy,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs, void *arg)
-{
-	struct ht_irq_cfg *ht_cfg;
-	struct irq_alloc_info *info = arg;
-	struct pci_dev *dev;
-	irq_hw_number_t hwirq;
-	int ret;
-
-	if (nr_irqs > 1 || !info)
-		return -EINVAL;
-
-	dev = info->ht_dev;
-	hwirq = (info->ht_idx & 0xFF) |
-		PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
-		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
-	if (irq_find_mapping(domain, hwirq) > 0)
-		return -EEXIST;
-
-	ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
-	if (!ht_cfg)
-		return -ENOMEM;
-
-	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
-	if (ret < 0) {
-		kfree(ht_cfg);
-		return ret;
-	}
-
-	/* Initialize msg to a value that will never match the first write. */
-	ht_cfg->msg.address_lo = 0xffffffff;
-	ht_cfg->msg.address_hi = 0xffffffff;
-	ht_cfg->dev = info->ht_dev;
-	ht_cfg->update = info->ht_update;
-	ht_cfg->pos = info->ht_pos;
-	ht_cfg->idx = 0x10 + (info->ht_idx * 2);
-	irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
-			    handle_edge_irq, ht_cfg, "edge");
-
-	return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs)
-{
-	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
-	BUG_ON(nr_irqs != 1);
-	kfree(irq_data->chip_data);
-	irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static int htirq_domain_activate(struct irq_domain *domain,
-				 struct irq_data *irq_data, bool early)
-{
-	struct ht_irq_msg msg;
-	struct irq_cfg *cfg = irqd_cfg(irq_data);
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-	write_ht_irq_msg(irq_data->irq, &msg);
-	return 0;
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
-				    struct irq_data *irq_data)
-{
-	struct ht_irq_msg msg;
-
-	memset(&msg, 0, sizeof(msg));
-	write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
-	.alloc		= htirq_domain_alloc,
-	.free		= htirq_domain_free,
-	.activate	= htirq_domain_activate,
-	.deactivate	= htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
-	struct fwnode_handle *fn;
-
-	if (disable_apic)
-		return;
-
-	fn = irq_domain_alloc_named_fwnode("PCI-HT");
-	if (!fn)
-		goto warn;
-
-	htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
-	irq_domain_free_fwnode(fn);
-	if (!htirq_domain)
-		goto warn;
-
-	htirq_domain->parent = parent;
-	return;
-
-warn:
-	pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
-		      ht_irq_update_t *update)
-{
-	struct irq_alloc_info info;
-
-	if (!htirq_domain)
-		return -ENOSYS;
-
-	init_irq_alloc_info(&info, NULL);
-	info.ht_idx = idx;
-	info.ht_pos = pos;
-	info.ht_dev = dev;
-	info.ht_update = update;
-
-	return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
-				     &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
-	irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 05c85e693a5d..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
 /*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
  *
  * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
  *	Moved from arch/x86/kernel/apic/io_apic.c.
@@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
 		nr_irqs = NR_VECTORS * nr_cpu_ids;
 
 	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
 	/*
 	 * for MSI and HT dyn irq
 	 */
@@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
 	irq_set_default_host(x86_vector_domain);
 
 	arch_init_msi_domain(x86_vector_domain);
-	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ae9e5eec2f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 
 	cr4_set_bits(X86_CR4_UMIP);
 
+	pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
 	return;
 
 out:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 410c5dadcee3..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 }
 
 static unsigned long mpf_base;
+static bool mpf_found;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
@@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
 	if (!smp_found_config)
 		return;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return;
 
 	if (acpi_lapic && early)
@@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			smp_found_config = 1;
 #endif
 			mpf_base = base;
+			mpf_found = true;
 
 			pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
 				base, base + sizeof(*mpf) - 1, mpf);
@@ -858,7 +860,7 @@ static int __init update_mp_table(void)
 	if (!enable_update_mptable)
 		return 0;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return 0;
 
 	mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5f59e6bee123..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
@@ -280,6 +277,25 @@ static void notrace start_secondary(void *unused)
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->initialized && c->phys_proc_id == phys_pkg)
+			return c->logical_proc_id;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
 /**
  * topology_update_package_map - Update the physical to logical package map
  * @pkg:	The physical package id as retrieved via CPUID
@@ -287,102 +303,23 @@ static void notrace start_secondary(void *unused)
  */
 int topology_update_package_map(unsigned int pkg, unsigned int cpu)
 {
-	unsigned int new;
+	int new;
 
-	/* Called from early boot ? */
-	if (!physical_package_map)
-		return 0;
-
-	if (pkg >= max_physical_pkg_id)
-		return -EINVAL;
-
-	/* Set the logical package id */
-	if (test_and_set_bit(pkg, physical_package_map))
+	/* Already available somewhere? */
+	new = topology_phys_to_logical_pkg(pkg);
+	if (new >= 0)
 		goto found;
 
-	if (logical_packages >= __max_logical_packages) {
-		pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
-			logical_packages, cpu, __max_logical_packages);
-		return -ENOSPC;
-	}
-
 	new = logical_packages++;
 	if (new != pkg) {
 		pr_info("CPU %u Converting physical %u to logical package %u\n",
 			cpu, pkg, new);
 	}
-	physical_to_logical_pkg[pkg] = new;
-
 found:
-	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+	cpu_data(cpu).logical_proc_id = new;
 	return 0;
 }
 
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
-	if (phys_pkg >= max_physical_pkg_id)
-		return -1;
-	return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
-	unsigned int ncpus;
-	size_t size;
-
-	/*
-	 * Today neither Intel nor AMD support heterogenous systems. That
-	 * might change in the future....
-	 *
-	 * While ideally we'd want '* smp_num_siblings' in the below @ncpus
-	 * computation, this won't actually work since some Intel BIOSes
-	 * report inconsistent HT data when they disable HT.
-	 *
-	 * In particular, they reduce the APIC-IDs to only include the cores,
-	 * but leave the CPUID topology to say there are (2) siblings.
-	 * This means we don't know how many threads there will be until
-	 * after the APIC enumeration.
-	 *
-	 * By not including this we'll sometimes over-estimate the number of
-	 * logical packages by the amount of !present siblings, but this is
-	 * still better than MAX_LOCAL_APIC.
-	 *
-	 * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
-	 * on the command line leading to a similar issue as the HT disable
-	 * problem because the hyperthreads are usually enumerated after the
-	 * primary cores.
-	 */
-	ncpus = boot_cpu_data.x86_max_cores;
-	if (!ncpus) {
-		pr_warn("x86_max_cores == zero !?!?");
-		ncpus = 1;
-	}
-
-	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
-	logical_packages = 0;
-
-	/*
-	 * Possibly larger than what we need as the number of apic ids per
-	 * package can be smaller than the actual used apic ids.
-	 */
-	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
-	size = max_physical_pkg_id * sizeof(unsigned int);
-	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
-	memset(physical_to_logical_pkg, 0xff, size);
-	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
-	physical_package_map = kzalloc(size, GFP_KERNEL);
-
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
-
-	topology_update_package_map(c->phys_proc_id, cpu);
-}
-
 void __init smp_store_boot_cpu_info(void)
 {
 	int id = 0; /* CPU 0 */
@@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
 
 	*c = boot_cpu_data;
 	c->cpu_index = id;
-	smp_init_package_map(c, id);
+	topology_update_package_map(c->phys_proc_id, id);
+	c->initialized = true;
 }
 
 /*
@@ -401,13 +339,16 @@ void smp_store_cpu_info(int id)
 {
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
-	*c = boot_cpu_data;
+	/* Copy boot_cpu_data only on the first bringup */
+	if (!c->initialized)
+		*c = boot_cpu_data;
 	c->cpu_index = id;
 	/*
 	 * During boot time, CPU0 has this setup already. Save the info when
 	 * bringing up AP or offlined CPU0.
 	 */
 	identify_secondary_cpu(c);
+	c->initialized = true;
 }
 
 static bool
@@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
+	int ncpus;
+
 	pr_debug("Boot done\n");
+	/*
+	 * Today neither Intel nor AMD support heterogenous systems so
+	 * extrapolate the boot cpu's data to all packages.
+	 */
+	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
 	if (x86_has_numa_in_package)
 		set_sched_topology(x86_numa_in_package_topology);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a63fe77b3217..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	/* No address checking. See comment at mmap_address_hint_valid() */
 	if (flags & MAP_FIXED)
 		return addr;
 
@@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* requesting a specific address */
 	if (addr) {
-		addr = PAGE_ALIGN(addr);
+		addr &= PAGE_MASK;
+		if (!mmap_address_hint_valid(addr, len))
+			goto get_unmapped_area;
+
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vm_start_gap(vma)))
+		if (!vma || addr + len <= vm_start_gap(vma))
 			return addr;
 	}
+get_unmapped_area:
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 6ba82be68cff..f44ce0fb3583 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -78,7 +78,60 @@
 
 #define	UMIP_INST_SGDT	0	/* 0F 01 /0 */
 #define	UMIP_INST_SIDT	1	/* 0F 01 /1 */
-#define	UMIP_INST_SMSW	3	/* 0F 01 /4 */
+#define	UMIP_INST_SMSW	2	/* 0F 01 /4 */
+#define	UMIP_INST_SLDT  3       /* 0F 00 /0 */
+#define	UMIP_INST_STR   4       /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+	[UMIP_INST_SGDT] = "SGDT",
+	[UMIP_INST_SIDT] = "SIDT",
+	[UMIP_INST_SMSW] = "SMSW",
+	[UMIP_INST_SLDT] = "SLDT",
+	[UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+	umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+	umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs:	Register set with the context in which the warning is printed
+ * @log_level:	Kernel log level to print the message
+ * @fmt:	The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+		 const char *fmt, ...)
+{
+	/* Bursts of 5 messages every two minutes */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+	struct task_struct *tsk = current;
+	struct va_format vaf;
+	va_list args;
+
+	if (!__ratelimit(&ratelimit))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+	       task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+	va_end(args);
+}
 
 /**
  * identify_insn() - Identify a UMIP-protected instruction
@@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
 		default:
 			return -EINVAL;
 		}
+	} else if (insn->opcode.bytes[1] == 0x0) {
+		if (X86_MODRM_REG(insn->modrm.value) == 0)
+			return UMIP_INST_SLDT;
+		else if (X86_MODRM_REG(insn->modrm.value) == 1)
+			return UMIP_INST_STR;
+		else
+			return -EINVAL;
+	} else {
+		return -EINVAL;
 	}
-
-	/* SLDT AND STR are not emulated */
-	return -EINVAL;
 }
 
 /**
@@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
 	if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
 		return;
 
-	pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
-			   tsk->comm, task_pid_nr(tsk), regs->ip,
-			   regs->sp, X86_PF_USER | X86_PF_WRITE,
-			   regs->ip);
+	umip_pr_err(regs, "segfault in emulation. error%x\n",
+		    X86_PF_USER | X86_PF_WRITE);
 }
 
 /**
@@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	unsigned char buf[MAX_INSN_SIZE];
 	void __user *uaddr;
 	struct insn insn;
-	char seg_defs;
+	int seg_defs;
 
 	if (!regs)
 		return false;
 
-	/* Do not emulate 64-bit processes. */
-	if (user_64bit_mode(regs))
-		return false;
-
 	/*
 	 * If not in user-space long mode, a custom code segment could be in
 	 * use. This is true in protected mode (if the process defined a local
@@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (umip_inst < 0)
 		return false;
 
+	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+			umip_insns[umip_inst]);
+
+	/* Do not emulate SLDT, STR or user long mode processes. */
+	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+		return false;
+
+	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
 	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
 		return false;
 
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 35625d279458..9119d8e41f1f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
  *
  * Returns:
  *
- * A signed 8-bit value containing the default parameters on success.
+ * An int containing ORed-in default parameters on success.
  *
  * -EINVAL on error.
  */
-char insn_get_code_seg_params(struct pt_regs *regs)
+int insn_get_code_seg_params(struct pt_regs *regs)
 {
 	struct desc_struct *desc;
 	short sel;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8ae0000cbdb3..00b296617ca4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	/* No address checking. See comment at mmap_address_hint_valid() */
 	if (flags & MAP_FIXED) {
 		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
@@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	}
 
 	if (addr) {
-		addr = ALIGN(addr, huge_page_size(h));
+		addr &= huge_page_mask(h);
+		if (!mmap_address_hint_valid(addr, len))
+			goto get_unmapped_area;
+
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vm_start_gap(vma)))
+		if (!vma || addr + len <= vm_start_gap(vma))
 			return addr;
 	}
+
+get_unmapped_area:
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
 		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 				pgoff, flags);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 2b60dc6e64b1..99dfed6dfef8 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -4,12 +4,14 @@
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
 #include <asm/e820/types.h>
+#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static int __init map_range(struct range *range)
+static __init void *early_alloc(size_t size, int nid)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, size,
+		__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pte_t *pte;
+
+	if (pmd_none(*pmd)) {
+		void *p;
+
+		if (boot_cpu_has(X86_FEATURE_PSE) &&
+		    ((end - addr) == PMD_SIZE) &&
+		    IS_ALIGNED(addr, PMD_SIZE)) {
+			p = early_alloc(PMD_SIZE, nid);
+			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
+				return;
+			else if (p)
+				memblock_free(__pa(p), PMD_SIZE);
+		}
+
+		p = early_alloc(PAGE_SIZE, nid);
+		pmd_populate_kernel(&init_mm, pmd, p);
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		pte_t entry;
+		void *p;
+
+		if (!pte_none(*pte))
+			continue;
+
+		p = early_alloc(PAGE_SIZE, nid);
+		entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
+		set_pte_at(&init_mm, addr, pte, entry);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	if (pud_none(*pud)) {
+		void *p;
+
+		if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
+		    ((end - addr) == PUD_SIZE) &&
+		    IS_ALIGNED(addr, PUD_SIZE)) {
+			p = early_alloc(PUD_SIZE, nid);
+			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
+				return;
+			else if (p)
+				memblock_free(__pa(p), PUD_SIZE);
+		}
+
+		p = early_alloc(PAGE_SIZE, nid);
+		pud_populate(&init_mm, pud, p);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (!pmd_large(*pmd))
+			kasan_populate_pmd(pmd, addr, next, nid);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	if (p4d_none(*p4d)) {
+		void *p = early_alloc(PAGE_SIZE, nid);
+
+		p4d_populate(&init_mm, p4d, p);
+	}
+
+	pud = pud_offset(p4d, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (!pud_large(*pud))
+			kasan_populate_pud(pud, addr, next, nid);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
+				      unsigned long end, int nid)
+{
+	void *p;
+	p4d_t *p4d;
+	unsigned long next;
+
+	if (pgd_none(*pgd)) {
+		p = early_alloc(PAGE_SIZE, nid);
+		pgd_populate(&init_mm, pgd, p);
+	}
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		kasan_populate_p4d(p4d, addr, next, nid);
+	} while (p4d++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
+					 int nid)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	addr = addr & PAGE_MASK;
+	end = round_up(end, PAGE_SIZE);
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_populate_pgd(pgd, addr, next, nid);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void __init map_range(struct range *range)
 {
 	unsigned long start;
 	unsigned long end;
@@ -26,7 +155,7 @@ static int __init map_range(struct range *range)
 	start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
 	end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
 
-	return vmemmap_populate(start, end, NUMA_NO_NODE);
+	kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
 }
 
 static void __init clear_pgds(unsigned long start,
@@ -189,16 +318,16 @@ void __init kasan_init(void)
 		if (pfn_mapped[i].end == 0)
 			break;
 
-		if (map_range(&pfn_mapped[i]))
-			panic("kasan: unable to allocate shadow!");
+		map_range(&pfn_mapped[i]);
 	}
+
 	kasan_populate_zero_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
 		kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
-	vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
-			(unsigned long)kasan_mem_to_shadow(_end),
-			NUMA_NO_NODE);
+	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+			      (unsigned long)kasan_mem_to_shadow(_end),
+			      early_pfn_to_nid(__pa(_stext)));
 
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a99679826846..155ecbac9e28 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -33,6 +33,8 @@
 #include <linux/compat.h>
 #include <asm/elf.h>
 
+#include "physaddr.h"
+
 struct va_alignment __read_mostly va_align = {
 	.flags = -1,
 };
@@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
+
+/**
+ * mmap_address_hint_valid - Validate the address hint of mmap
+ * @addr:	Address hint
+ * @len:	Mapping length
+ *
+ * Check whether @addr and @addr + @len result in a valid mapping.
+ *
+ * On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
+ *
+ * On 64bit with 5-level page tables another sanity check is required
+ * because mappings requested by mmap(@addr, 0) which cross the 47-bit
+ * virtual address boundary can cause the following theoretical issue:
+ *
+ *  An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
+ *  is below the border of the 47-bit address space and @addr + @len is
+ *  above the border.
+ *
+ *  With 4-level paging this request succeeds, but the resulting mapping
+ *  address will always be within the 47-bit virtual address space, because
+ *  the hint address does not result in a valid mapping and is
+ *  ignored. Hence applications which are not prepared to handle virtual
+ *  addresses above 47-bit work correctly.
+ *
+ *  With 5-level paging this request would be granted and result in a
+ *  mapping which crosses the border of the 47-bit virtual address
+ *  space. If the application cannot handle addresses above 47-bit this
+ *  will lead to misbehaviour and hard to diagnose failures.
+ *
+ * Therefore ignore address hints which would result in a mapping crossing
+ * the 47-bit virtual address boundary.
+ *
+ * Note, that in the same scenario with MAP_FIXED the behaviour is
+ * different. The request with @addr < 47-bit and @addr + @len > 47-bit
+ * fails on a 4-level paging machine but succeeds on a 5-level paging
+ * machine. It is reasonable to expect that an application does not rely on
+ * the failure of such a fixed mapping request, so the restriction is not
+ * applied.
+ */
+bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+{
+	if (TASK_SIZE - len < addr)
+		return false;
+
+	return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
+}
+
+/* Can we access it for direct reading/writing? Must be RAM: */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+	return addr + count <= __pa(high_memory);
+}
+
+/* Can we access it through mmap? Must be a valid physical address: */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+	phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+	return phys_addr_valid(addr + count - 1);
+}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 970e1242a282..6aefe5370e5b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -343,6 +343,10 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 	size_t size = vma->vm_end - vma->vm_start;
 	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
 
+	/* Does it even fit in phys_addr_t? */
+	if (offset >> PAGE_SHIFT != vma->vm_pgoff)
+		return -EINVAL;
+
 	/* It's illegal to wrap around the end of the physical address space. */
 	if (offset + (phys_addr_t)size - 1 < offset)
 		return -EINVAL;
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index c32a77fc8b03..99ae5e30eabe 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -71,15 +71,6 @@ config XEN_PCIDEV_FRONTEND
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
 
-config HT_IRQ
-	bool "Interrupts on hypertransport devices"
-	default y
-	depends on PCI && X86_LOCAL_APIC
-	help
-	   This allows native hypertransport devices to use interrupts.
-
-	   If unsure say Y.
-
 config PCI_ATS
 	bool
 
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 80adbdbcecce..ab0104e0ffac 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -24,9 +24,6 @@ endif
 # Build the PCI MSI interrupt support
 obj-$(CONFIG_PCI_MSI) += msi.o
 
-# Build the Hypertransport interrupt support
-obj-$(CONFIG_HT_IRQ) += htirq.o
-
 obj-$(CONFIG_PCI_ATS) += ats.o
 obj-$(CONFIG_PCI_IOV) += iov.o
 
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
deleted file mode 100644
index bb88c26f5144..000000000000
--- a/drivers/pci/htirq.c
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * File:	htirq.c
- * Purpose:	Hypertransport Interrupt Capability
- *
- * Copyright (C) 2006 Linux Networx
- * Copyright (C) Eric Biederman <ebiederman@lnxi.com>
- */
-
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/htirq.h>
-
-/* Global ht irq lock.
- *
- * This is needed to serialize access to the data port in hypertransport
- * irq capability.
- *
- * With multiple simultaneous hypertransport irq devices it might pay
- * to make this more fine grained.  But start with simple, stupid, and correct.
- */
-static DEFINE_SPINLOCK(ht_irq_lock);
-
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
-	struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	if (cfg->msg.address_lo != msg->address_lo) {
-		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo);
-	}
-	if (cfg->msg.address_hi != msg->address_hi) {
-		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
-		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
-	}
-	if (cfg->update)
-		cfg->update(cfg->dev, irq, msg);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-	cfg->msg = *msg;
-}
-
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
-	struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
-
-	*msg = cfg->msg;
-}
-
-void mask_ht_irq(struct irq_data *data)
-{
-	struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
-	struct ht_irq_msg msg = cfg->msg;
-
-	msg.address_lo |= 1;
-	write_ht_irq_msg(data->irq, &msg);
-}
-
-void unmask_ht_irq(struct irq_data *data)
-{
-	struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
-	struct ht_irq_msg msg = cfg->msg;
-
-	msg.address_lo &= ~1;
-	write_ht_irq_msg(data->irq, &msg);
-}
-
-/**
- * __ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- * @update: Function to be called when changing the htirq message
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
-{
-	int max_irq, pos, irq;
-	unsigned long flags;
-	u32 data;
-
-	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
-	if (!pos)
-		return -EINVAL;
-
-	/* Verify the idx I want to use is in range */
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(dev, pos + 2, 1);
-	pci_read_config_dword(dev, pos + 4, &data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-
-	max_irq = (data >> 16) & 0xff;
-	if (idx > max_irq)
-		return -EINVAL;
-
-	irq = arch_setup_ht_irq(idx, pos, dev, update);
-	if (irq > 0)
-		dev_dbg(&dev->dev, "irq %d for HT\n", irq);
-
-	return irq;
-}
-EXPORT_SYMBOL(__ht_create_irq);
-
-/**
- * ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- *
- * ht_create_irq needs to be called for all hypertransport devices
- * that generate irqs.
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int ht_create_irq(struct pci_dev *dev, int idx)
-{
-	return __ht_create_irq(dev, idx, NULL);
-}
-EXPORT_SYMBOL(ht_create_irq);
-
-/**
- * ht_destroy_irq - destroy an irq created with ht_create_irq
- * @irq: irq to be destroyed
- *
- * This reverses ht_create_irq removing the specified irq from
- * existence.  The irq should be free before this happens.
- */
-void ht_destroy_irq(unsigned int irq)
-{
-	arch_teardown_ht_irq(irq);
-}
-EXPORT_SYMBOL(ht_destroy_irq);
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
deleted file mode 100644
index 127c39d815ba..000000000000
--- a/include/linux/htirq.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_HTIRQ_H
-#define LINUX_HTIRQ_H
-
-struct pci_dev;
-struct irq_data;
-
-struct ht_irq_msg {
-	u32	address_lo;	/* low 32 bits of the ht irq message */
-	u32	address_hi;	/* high 32 bits of the it irq message */
-};
-
-typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
-			       struct ht_irq_msg *msg);
-
-struct ht_irq_cfg {
-	struct pci_dev *dev;
-	 /* Update callback used to cope with buggy hardware */
-	ht_irq_update_t *update;
-	unsigned pos;
-	unsigned idx;
-	struct ht_irq_msg msg;
-};
-
-/* Helper functions.. */
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void mask_ht_irq(struct irq_data *data);
-void unmask_ht_irq(struct irq_data *data);
-
-/* The arch hook for getting things started */
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
-		      ht_irq_update_t *update);
-void arch_teardown_ht_irq(unsigned int irq);
-
-/* For drivers of buggy hardware */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
-
-#endif /* LINUX_HTIRQ_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d16a7c037ec0..16287684dfe8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1484,12 +1484,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
 static inline void pcie_ecrc_get_policy(char *str) { }
 #endif
 
-#ifdef CONFIG_HT_IRQ
-/* The functions a driver should call */
-int  ht_create_irq(struct pci_dev *dev, int idx);
-void ht_destroy_irq(unsigned int irq);
-#endif /* CONFIG_HT_IRQ */
-
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c
new file mode 100644
index 000000000000..2eafdcd4c2b3
--- /dev/null
+++ b/tools/testing/selftests/x86/5lvl.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <sys/mman.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define PAGE_SIZE	4096
+#define LOW_ADDR	((void *) (1UL << 30))
+#define HIGH_ADDR	((void *) (1UL << 50))
+
+struct testcase {
+	void *addr;
+	unsigned long size;
+	unsigned long flags;
+	const char *msg;
+	unsigned int low_addr_required:1;
+	unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+	{
+		.addr = NULL,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1) again",
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE / 2),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+	{
+		.addr = NULL,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void*) -1,
+		.size = 2UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB) again",
+	},
+	{
+		.addr = (void *)((1UL << 47) - PAGE_SIZE),
+		.size = 4UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)((1UL << 47) - (2UL << 20)),
+		.size = 4UL << 20,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)",
+	},
+};
+
+int main(int argc, char **argv)
+{
+	int i;
+	void *p;
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		struct testcase *t = testcases + i;
+
+		p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0);
+
+		printf("%s: %p - ", t->msg, p);
+
+		if (p == MAP_FAILED) {
+			printf("FAILED\n");
+			continue;
+		}
+
+		if (t->low_addr_required && p >= (void *)(1UL << 47))
+			printf("FAILED\n");
+		else
+			printf("OK\n");
+		if (!t->keep_mapped)
+			munmap(p, t->size);
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 7b1adeee4b0f..939a337128db 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
index 3f0093911f03..d1b61ab870f8 100644
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -52,14 +52,14 @@
 struct mpx_bd_entry {
 	union {
 		char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
-		void *contents[1];
+		void *contents[0];
 	};
 } __attribute__((packed));
 
 struct mpx_bt_entry {
 	union {
 		char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
-		unsigned long contents[1];
+		unsigned long contents[0];
 	};
 } __attribute__((packed));
 
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index 3818f25391c2..b3cb7670e026 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -30,6 +30,7 @@ static inline void sigsafe_printf(const char *format, ...)
 	if (!dprint_in_signal) {
 		vprintf(format, ap);
 	} else {
+		int ret;
 		int len = vsnprintf(dprint_in_signal_buffer,
 				    DPRINT_IN_SIGNAL_BUF_SIZE,
 				    format, ap);
@@ -39,7 +40,9 @@ static inline void sigsafe_printf(const char *format, ...)
 		 */
 		if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
 			len = DPRINT_IN_SIGNAL_BUF_SIZE;
-		write(1, dprint_in_signal_buffer, len);
+		ret = write(1, dprint_in_signal_buffer, len);
+		if (ret < 0)
+			abort();
 	}
 	va_end(ap);
 }
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 7a1cc0e56d2d..bc1b0735bb50 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -250,7 +250,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	unsigned long ip;
 	char *fpregs;
 	u32 *pkru_ptr;
-	u64 si_pkey;
+	u64 siginfo_pkey;
 	u32 *si_pkey_ptr;
 	int pkru_offset;
 	fpregset_t fpregset;
@@ -292,9 +292,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
 	dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
 	dump_mem(si_pkey_ptr - 8, 24);
-	si_pkey = *si_pkey_ptr;
-	pkey_assert(si_pkey < NR_PKEYS);
-	last_si_pkey = si_pkey;
+	siginfo_pkey = *si_pkey_ptr;
+	pkey_assert(siginfo_pkey < NR_PKEYS);
+	last_si_pkey = siginfo_pkey;
 
 	if ((si->si_code == SEGV_MAPERR) ||
 	    (si->si_code == SEGV_ACCERR) ||
@@ -306,7 +306,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
 	/* need __rdpkru() version so we do not do shadow_pkru checking */
 	dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
-	dprintf1("si_pkey from siginfo: %jx\n", si_pkey);
+	dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
 	*(u64 *)pkru_ptr = 0x00000000;
 	dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
 	pkru_faults++;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-11-05 14:46 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-11-05 14:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton, Josh Poimboeuf

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: eda9cec4c9a12208a6f69fbe68f72a6311d50032 x86/module: Detect and skip invalid relocations

Two fixes:

 - A PCID related revert that fixes power management and performance regressions.

 - The module loader robustization and sanity check commit is rather fresh, but it
   looked like a good idea to apply because of the hidden data corruption problem
   such invalid modules could cause.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  arch/ia64/include/asm/acpi.h       # 675357362aeb: Revert "x86/mm: Stop calling
  drivers/acpi/processor_idle.c      # 675357362aeb: Revert "x86/mm: Stop calling
  drivers/idle/intel_idle.c          # 675357362aeb: Revert "x86/mm: Stop calling

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      Revert "x86/mm: Stop calling leave_mm() in idle code"

Josh Poimboeuf (1):
      x86/module: Detect and skip invalid relocations


 arch/ia64/include/asm/acpi.h  |  2 ++
 arch/x86/include/asm/acpi.h   |  2 ++
 arch/x86/kernel/module.c      | 13 +++++++++++++
 arch/x86/mm/tlb.c             | 17 ++++++++++++++---
 drivers/acpi/processor_idle.c |  2 ++
 drivers/idle/intel_idle.c     |  9 +++++----
 6 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index c86a947f5368..a3d0211970e9 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,6 +112,8 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
 	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
 }
 
+#define acpi_unlazy_tlb(x)
+
 #ifdef CONFIG_ACPI_NUMA
 extern cpumask_t early_cpu_possible_map;
 #define for_each_possible_early_cpu(cpu)  \
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 72d867f6b518..8d0ec9df1cbe 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,6 +150,8 @@ static inline void disable_acpi(void) { }
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
+#define acpi_unlazy_tlb(x)	leave_mm(x)
+
 #ifdef CONFIG_ACPI_APEI
 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70aadd5..da0c160e5589 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_X86_64_NONE:
 			break;
 		case R_X86_64_64:
+			if (*(u64 *)loc != 0)
+				goto invalid_relocation;
 			*(u64 *)loc = val;
 			break;
 		case R_X86_64_32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			*(u32 *)loc = val;
 			if (val != *(u32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_32S:
+			if (*(s32 *)loc != 0)
+				goto invalid_relocation;
 			*(s32 *)loc = val;
 			if ((s64)val != *(s32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			val -= (u64)loc;
 			*(u32 *)loc = val;
 #if 0
@@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	}
 	return 0;
 
+invalid_relocation:
+	pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+	return -ENOEXEC;
+
 overflow:
 	pr_err("overflow in relocation type %d val %Lx\n",
 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0f3d0cea4d00..3118392cdf75 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -85,6 +85,7 @@ void leave_mm(int cpu)
 
 	switch_mm(NULL, &init_mm, NULL);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	       struct task_struct *tsk)
@@ -195,12 +196,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
 			write_cr3(build_cr3(next, new_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-					TLB_FLUSH_ALL);
+
+			/*
+			 * NB: This gets called via leave_mm() in the idle path
+			 * where RCU functions differently.  Tracing normally
+			 * uses RCU, so we need to use the _rcuidle variant.
+			 *
+			 * (There is no good reason for this.  The idle code should
+			 *  be rearranged to call this before rcu_idle_enter().)
+			 */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
 			write_cr3(build_cr3_noflush(next, new_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+
+			/* See above wrt _rcuidle. */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
 		this_cpu_write(cpu_tlbstate.loaded_mm, next);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2736e25e9dc6..d50a7b6ccddd 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -710,6 +710,8 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
 static void acpi_idle_enter_bm(struct acpi_processor *pr,
 			       struct acpi_processor_cx *cx, bool timer_bc)
 {
+	acpi_unlazy_tlb(smp_processor_id());
+
 	/*
 	 * Must be done before busmaster disable as we might need to
 	 * access HPET !
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5dc7ea4b6bc4..f0b06b14e782 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -913,15 +913,16 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
 	struct cpuidle_state *state = &drv->states[index];
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned int cstate;
+	int cpu = smp_processor_id();
 
 	cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
 
 	/*
-	 * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition
-	 * will probably flush the TLB.  It's not guaranteed to flush
-	 * the TLB, though, so it's not clear that we can do anything
-	 * useful with this knowledge.
+	 * leave_mm() to avoid costly and often unnecessary wakeups
+	 * for flushing the user TLB's associated with the active mm.
 	 */
+	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
+		leave_mm(cpu);
 
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		tick_broadcast_enter();

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-10-27 19:24 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-10-27 19:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 90edaac62729d3b9cbb97756261a0049a7fdd6a0 Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"

Misc fixes:

 - revert a /dev/mem restriction change that crashes with certain boot parameters
 - an AMD erratum fix for cases where the BIOS doesn't apply it
 - fix unwinder debuginfo
 - improve ORC unwinder warning printouts

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't

Ingo Molnar (1):
      Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"

Josh Poimboeuf (2):
      x86/entry: Fix idtentry unwind hint
      x86/unwind: Show function name+offset in ORC error messages


 arch/x86/entry/entry_64.S    |  2 +-
 arch/x86/include/asm/io.h    |  4 ----
 arch/x86/kernel/amd_nb.c     | 41 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/unwind_orc.c | 29 +++++++++++++++--------------
 arch/x86/mm/mmap.c           | 12 ------------
 5 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 49167258d587..f6cdb7a1455e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -808,7 +808,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
-	UNWIND_HINT_IRET_REGS offset=8
+	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 322d25ae23ab..c40a95c33bb8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -110,10 +110,6 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
 
 #endif
 
-#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
-extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da8509b75..6db28f17ff28 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{}
 };
 
+#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
+
 const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
 
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+	msr_set_bit(MSR_AMD64_IC_CFG, 3);
+	msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+	struct pci_dev *F4;
+	u32 val;
+
+	if (boot_cpu_data.x86 != 0x14)
+		return;
+
+	if (!amd_northbridges.num)
+		return;
+
+	F4 = node_to_amd_nb(0)->link;
+	if (!F4)
+		return;
+
+	if (pci_read_config_dword(F4, 0x164, &val))
+		return;
+
+	if (val & BIT(2))
+		return;
+
+	on_each_cpu(__fix_erratum_688, NULL, 0);
+
+	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
 static __init int init_amd_nbs(void)
 {
 	amd_cache_northbridges();
 	amd_cache_gart();
 
+	fix_erratum_688();
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 570b70d3f604..b95007e7c1b3 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
 
 		if (unlikely((idx >= lookup_num_blocks-1))) {
-			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
-				 idx, lookup_num_blocks, ip);
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+				 idx, lookup_num_blocks, (void *)ip);
 			return NULL;
 		}
 
@@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 
 		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
 			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
-			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
-				 idx, lookup_num_blocks, start, stop, ip);
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+				 idx, lookup_num_blocks, start, stop, (void *)ip);
 			return NULL;
 		}
 
@@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R10:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R10 at ip %p\n",
+			orc_warn("missing regs for base reg R10 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R13:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R13 at ip %p\n",
+			orc_warn("missing regs for base reg R13 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DI:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DI at ip %p\n",
+			orc_warn("missing regs for base reg DI at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DX:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DX at ip %p\n",
+			orc_warn("missing regs for base reg DX at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown SP base reg %d for ip %p\n",
+		orc_warn("unknown SP base reg %d for ip %pB\n",
 			 orc->sp_reg, (void *)state->ip);
 		goto done;
 	}
@@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
-			orc_warn("can't dereference registers at %p for ip %p\n",
+			orc_warn("can't dereference registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS_IRET:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
-			orc_warn("can't dereference iret registers at %p for ip %p\n",
+			orc_warn("can't dereference iret registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+		orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+			 orc->type, (void *)orig_ip);
 		break;
 	}
 
@@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown BP base reg %d for ip %p\n",
+		orc_warn("unknown BP base reg %d for ip %pB\n",
 			 orc->bp_reg, (void *)orig_ip);
 		goto done;
 	}
@@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->stack_info.type == prev_type &&
 	    on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
 	    state->sp <= prev_sp) {
-		orc_warn("stack going in the wrong direction? ip=%p\n",
+		orc_warn("stack going in the wrong direction? ip=%pB\n",
 			 (void *)orig_ip);
 		goto done;
 	}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 320c6237e1d1..a99679826846 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -174,15 +174,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[mpx]";
 	return NULL;
 }
-
-int valid_phys_addr_range(phys_addr_t addr, size_t count)
-{
-	return addr + count <= __pa(high_memory);
-}
-
-int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
-{
-	phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
-
-	return valid_phys_addr_range(addr, count);
-}

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-10-14 16:16 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-10-14 16:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 x86/microcode: Do the family check first

A landry list of fixes:

 - fix reboot breakage on some PCID-enabled system
 - fix crashes/hangs on some PCID-enabled systems
 - fix microcode loading on certain older CPUs
 - various unwinder fixes
 - extend an APIC quirk to more hardware systems and disable APIC related warning 
   on virtualized systems
 - various Hyper-V fixes
 - a macro definition robustness fix
 - remove jprobes IRQ disabling
 - various mem-encryption fixes

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/mm/64: Fix reboot interaction with CR4.PCIDE
      x86/mm: Flush more aggressively in lazy TLB mode

Borislav Petkov (1):
      x86/microcode: Do the family check first

Josh Poimboeuf (5):
      kprobes/x86: Set up frame pointer in kprobe trampoline
      x86/unwind: Fix dereference of untrusted pointer
      x86/unwind: Use MSB for frame pointer encoding on 32-bit
      x86/unwind: Align stack pointer in unwinder dump
      x86/unwind: Disable unwinder warnings on 32-bit

Len Brown (1):
      x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping

Marcelo Henrique Cerri (1):
      x86/hyperv: Fix hypercalls with extended CPU ranges for TLB flushing

Masami Hiramatsu (1):
      kprobes/x86: Remove IRQ disabling from jprobe handlers

Mathias Krause (1):
      x86/alternatives: Fix alt_max_short macro to really be a max()

Paolo Bonzini (1):
      x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

Tom Lendacky (1):
      x86/mm: Disable various instrumentations of mm/mem_encrypt.c and mm/tlb.c

Vitaly Kuznetsov (2):
      x86/hyperv: Clear vCPU banks between calls to avoid flushing unneeded vCPUs
      x86/hyperv: Don't use percpu areas for pcpu_flush/pcpu_flush_ex structures


 arch/x86/entry/entry_32.S              |   4 +-
 arch/x86/hyperv/hv_init.c              |   5 ++
 arch/x86/hyperv/mmu.c                  |  57 +++++++++---
 arch/x86/include/asm/alternative-asm.h |   4 +-
 arch/x86/include/asm/alternative.h     |   6 +-
 arch/x86/include/asm/mmu_context.h     |   8 +-
 arch/x86/include/asm/mshyperv.h        |   1 +
 arch/x86/include/asm/tlbflush.h        |  24 ++++++
 arch/x86/kernel/apic/apic.c            |  15 +++-
 arch/x86/kernel/cpu/microcode/core.c   |  27 ++++--
 arch/x86/kernel/kprobes/common.h       |  13 ++-
 arch/x86/kernel/kprobes/core.c         |   2 -
 arch/x86/kernel/reboot.c               |   4 +
 arch/x86/kernel/unwind_frame.c         |  38 +++++++-
 arch/x86/mm/Makefile                   |  11 ++-
 arch/x86/mm/tlb.c                      | 153 ++++++++++++++++++++++++---------
 16 files changed, 284 insertions(+), 88 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 8a13d468635a..50e0d2bc4528 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,7 +176,7 @@
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
  * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
+ * is just clearing the MSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
  * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
@@ -185,7 +185,7 @@
 .macro ENCODE_FRAME_POINTER
 #ifdef CONFIG_FRAME_POINTER
 	mov %esp, %ebp
-	orl $0x1, %ebp
+	andl $0x7fffffff, %ebp
 #endif
 .endm
 
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1a8eb550c40f..a5db63f728a2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
 u32 *hv_vp_index;
 EXPORT_SYMBOL_GPL(hv_vp_index);
 
+u32 hv_max_vp_index;
+
 static int hv_cpu_init(unsigned int cpu)
 {
 	u64 msr_vp_index;
@@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu)
 
 	hv_vp_index[smp_processor_id()] = msr_vp_index;
 
+	if (msr_vp_index > hv_max_vp_index)
+		hv_max_vp_index = msr_vp_index;
+
 	return 0;
 }
 
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 39e7f6e50919..9cc9e1c1e2db 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex {
 /* Each gva in gva_list encodes up to 4096 pages to flush */
 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
 
-static struct hv_flush_pcpu __percpu *pcpu_flush;
+static struct hv_flush_pcpu __percpu **pcpu_flush;
 
-static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
 
 /*
  * Fills in gva_list starting from offset. Returns the number of items added.
@@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
 {
 	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
 
+	/* valid_bank_mask can represent up to 64 banks */
+	if (hv_max_vp_index / 64 >= 64)
+		return 0;
+
+	/*
+	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+	 * structs are not cleared between calls, we risk flushing unneeded
+	 * vCPUs otherwise.
+	 */
+	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+		flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
+
 	/*
 	 * Some banks may end up being empty but this is acceptable.
 	 */
@@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
 		vcpu = hv_cpu_number_to_vp_number(cpu);
 		vcpu_bank = vcpu / 64;
 		vcpu_offset = vcpu % 64;
-
-		/* valid_bank_mask can represent up to 64 banks */
-		if (vcpu_bank >= 64)
-			return 0;
-
 		__set_bit(vcpu_offset, (unsigned long *)
 			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
 		if (vcpu_bank >= nr_bank)
@@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 				    const struct flush_tlb_info *info)
 {
 	int cpu, vcpu, gva_n, max_gvas;
+	struct hv_flush_pcpu **flush_pcpu;
 	struct hv_flush_pcpu *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
@@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush = this_cpu_ptr(pcpu_flush);
+	flush_pcpu = this_cpu_ptr(pcpu_flush);
+
+	if (unlikely(!*flush_pcpu))
+		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+	flush = *flush_pcpu;
+
+	if (unlikely(!flush)) {
+		local_irq_restore(flags);
+		goto do_native;
+	}
 
 	if (info->mm) {
 		flush->address_space = virt_to_phys(info->mm->pgd);
@@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 				       const struct flush_tlb_info *info)
 {
 	int nr_bank = 0, max_gvas, gva_n;
+	struct hv_flush_pcpu_ex **flush_pcpu;
 	struct hv_flush_pcpu_ex *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
@@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush = this_cpu_ptr(pcpu_flush_ex);
+	flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
+
+	if (unlikely(!*flush_pcpu))
+		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+	flush = *flush_pcpu;
+
+	if (unlikely(!flush)) {
+		local_irq_restore(flags);
+		goto do_native;
+	}
 
 	if (info->mm) {
 		flush->address_space = virt_to_phys(info->mm->pgd);
@@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-			0, nr_bank + 2, flush, NULL);
+			0, nr_bank, flush, NULL);
 	} else if (info->end &&
 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-			0, nr_bank + 2, flush, NULL);
+			0, nr_bank, flush, NULL);
 	} else {
 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
 				      info->start, info->end);
 		status = hv_do_rep_hypercall(
 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
-			gva_n, nr_bank + 2, flush, NULL);
+			gva_n, nr_bank, flush, NULL);
 	}
 
 	local_irq_restore(flags);
@@ -266,7 +295,7 @@ void hyper_alloc_mmu(void)
 		return;
 
 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
-		pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+		pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
 	else
-		pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+		pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
 }
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636bac7372..6c98821fef5e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
 #define new_len2		145f-144f
 
 /*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
  * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
  */
 #define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
 
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index c096624137ae..ccbe24e697c4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	alt_end_marker ":\n"
 
 /*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
  * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
  *
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
  */
-#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
 
 /*
  * Pad the second replacement alternative with additional NOPs if it is
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c120b5db178a..3c856a15b98e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 	DEBUG_LOCKS_WARN_ON(preemptible());
 }
 
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-	int cpu = smp_processor_id();
-
-	if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-		cpumask_clear_cpu(cpu, mm_cpumask(mm));
-}
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 738503e1f80c..530f448fddaf 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
  * to this information.
  */
 extern u32 *hv_vp_index;
+extern u32 hv_max_vp_index;
 
 /**
  * hv_cpu_number_to_vp_number() - Map CPU to VP.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4893abf7f74f..d362161d3291 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,6 +83,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 #endif
 
 /*
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread).  If
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
+ */
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
+/*
  * 6 because 6 should be plenty and struct tlb_state will fit in
  * two cache lines.
  */
@@ -105,6 +112,23 @@ struct tlb_state {
 	u16 next_asid;
 
 	/*
+	 * We can be in one of several states:
+	 *
+	 *  - Actively using an mm.  Our CPU's bit will be set in
+	 *    mm_cpumask(loaded_mm) and is_lazy == false;
+	 *
+	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
+	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+	 *
+	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
+	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
+	 *    We're heuristically guessing that the CR3 load we
+	 *    skipped more than makes up for the overhead added by
+	 *    lazy mode.
+	 */
+	bool is_lazy;
+
+	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
 	 * disabling interrupts when modifying either one.
 	 */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..ff891772c9f8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
 	return ~0U;
 }
 
+static u32 skx_deadline_rev(void)
+{
+	switch (boot_cpu_data.x86_mask) {
+	case 0x03: return 0x01000136;
+	case 0x04: return 0x02000014;
+	}
+
+	return ~0U;
+}
+
 static const struct x86_cpu_id deadline_match[] = {
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X,	0x02000014),
+	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
 
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
 	const struct x86_cpu_id *m;
 	u32 rev;
 
-	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return;
 
 	m = x86_match_cpu(deadline_match);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b2537b..c4fa4a85d4cb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
 	bool *res = &dis_ucode_ldr;
 #endif
 
-	if (!have_cpuid_p())
-		return *res;
-
 	/*
 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
 	 * completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
 void __init load_ucode_bsp(void)
 {
 	unsigned int cpuid_1_eax;
+	bool intel = true;
 
-	if (check_loader_disabled_bsp())
+	if (!have_cpuid_p())
 		return;
 
 	cpuid_1_eax = native_cpuid_eax(1);
 
 	switch (x86_cpuid_vendor()) {
 	case X86_VENDOR_INTEL:
-		if (x86_family(cpuid_1_eax) >= 6)
-			load_ucode_intel_bsp();
+		if (x86_family(cpuid_1_eax) < 6)
+			return;
 		break;
+
 	case X86_VENDOR_AMD:
-		if (x86_family(cpuid_1_eax) >= 0x10)
-			load_ucode_amd_bsp(cpuid_1_eax);
+		if (x86_family(cpuid_1_eax) < 0x10)
+			return;
+		intel = false;
 		break;
+
 	default:
-		break;
+		return;
 	}
+
+	if (check_loader_disabled_bsp())
+		return;
+
+	if (intel)
+		load_ucode_intel_bsp();
+	else
+		load_ucode_amd_bsp(cpuid_1_eax);
 }
 
 static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d63ed0..3fc0f9a794cb 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -3,6 +3,15 @@
 
 /* Kprobes and Optprobes common header */
 
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n" \
+			 "	mov  %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n"
+#endif
+
 #ifdef CONFIG_X86_64
 #define SAVE_REGS_STRING			\
 	/* Skip cs, ip, orig_ax. */		\
@@ -17,7 +26,7 @@
 	"	pushq %r10\n"			\
 	"	pushq %r11\n"			\
 	"	pushq %rbx\n"			\
-	"	pushq %rbp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushq %r12\n"			\
 	"	pushq %r13\n"			\
 	"	pushq %r14\n"			\
@@ -48,7 +57,7 @@
 	"	pushl %es\n"			\
 	"	pushl %ds\n"			\
 	"	pushl %eax\n"			\
-	"	pushl %ebp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushl %edi\n"			\
 	"	pushl %esi\n"			\
 	"	pushl %edx\n"			\
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..0742491cbb73 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * raw stack chunk with redzones:
 	 */
 	__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
-	regs->flags &= ~X86_EFLAGS_IF;
-	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
 
 	/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa6f66f..add33f600531 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type)
 	load_cr3(initial_page_table);
 #else
 	write_cr3(real_mode_header->trampoline_pgd);
+
+	/* Exiting long mode will fail if CR4.PCIDE is set. */
+	if (static_cpu_has(X86_FEATURE_PCID))
+		cr4_clear_bits(X86_CR4_PCIDE);
 #endif
 
 	/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b1f529..3dc26f95d46e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
 			state->stack_info.type, state->stack_info.next_sp,
 			state->stack_mask, state->graph_idx);
 
-	for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+	for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+	     sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
 		if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
 			break;
 
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
  * This determines if the frame pointer actually contains an encoded pointer to
  * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
  */
+#ifdef CONFIG_X86_64
 static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 {
 	unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 
 	return (struct pt_regs *)(regs & ~0x1);
 }
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (regs & 0x80000000)
+		return NULL;
+
+	return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
 
 static bool update_stack_state(struct unwind_state *state,
 			       unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
 	regs = decode_frame_pointer(next_bp);
 	if (regs) {
 		frame = (unsigned long *)regs;
-		len = regs_size(regs);
+		len = KERNEL_REGS_SIZE;
 		state->got_irq = true;
 	} else {
 		frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
 	    frame < prev_frame_end)
 		return false;
 
+	/*
+	 * On 32-bit with user mode regs, make sure the last two regs are safe
+	 * to access:
+	 */
+	if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+	    !on_stack(info, frame, len + 2*sizeof(long)))
+		return false;
+
 	/* Move state to the next frame: */
 	if (regs) {
 		state->regs = regs;
@@ -328,6 +355,13 @@ bool unwind_next_frame(struct unwind_state *state)
 	    state->regs->sp < (unsigned long)task_pt_regs(state->task))
 		goto the_end;
 
+	/*
+	 * There are some known frame pointer issues on 32-bit.  Disable
+	 * unwinder warnings on 32-bit until it gets objtool support.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32))
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 72bf8c01c6e3..e1f095884386 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,12 @@
-# Kernel does not boot with instrumentation of tlb.c.
-KCOV_INSTRUMENT_tlb.o	:= n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
+KCOV_INSTRUMENT_tlb.o		:= n
+KCOV_INSTRUMENT_mem_encrypt.o	:= n
+
+KASAN_SANITIZE_mem_encrypt.o	:= n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_mem_encrypt.o	= -pg
+endif
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 49d9778376d7..658bf0090565 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -30,6 +30,8 @@
 
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
 
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
 static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 			    u16 *new_asid, bool *need_flush)
 {
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
 		return;
 
 	/* Warn if we're not lazy. */
-	WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
+	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
 
 	switch_mm(NULL, &init_mm, NULL);
 }
@@ -142,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		__flush_tlb_all();
 	}
 #endif
+	this_cpu_write(cpu_tlbstate.is_lazy, false);
 
 	if (real_prev == next) {
 		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 			  next->context.ctx_id);
 
-		if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
-			/*
-			 * There's nothing to do: we weren't lazy, and we
-			 * aren't changing our mm.  We don't need to flush
-			 * anything, nor do we need to update CR3, CR4, or
-			 * LDTR.
-			 */
-			return;
-		}
-
-		/* Resume remote flushes and then read tlb_gen. */
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-
-		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
-		    next_tlb_gen) {
-			/*
-			 * Ideally, we'd have a flush_tlb() variant that
-			 * takes the known CR3 value as input.  This would
-			 * be faster on Xen PV and on hypothetical CPUs
-			 * on which INVPCID is fast.
-			 */
-			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
-				       next_tlb_gen);
-			write_cr3(build_cr3(next, prev_asid));
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-					TLB_FLUSH_ALL);
-		}
-
 		/*
-		 * We just exited lazy mode, which means that CR4 and/or LDTR
-		 * may be stale.  (Changes to the required CR4 and LDTR states
-		 * are not reflected in tlb_gen.)
+		 * We don't currently support having a real mm loaded without
+		 * our cpu set in mm_cpumask().  We have all the bookkeeping
+		 * in place to figure out whether we would need to flush
+		 * if our cpu were cleared in mm_cpumask(), but we don't
+		 * currently use it.
 		 */
+		if (WARN_ON_ONCE(real_prev != &init_mm &&
+				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
+			cpumask_set_cpu(cpu, mm_cpumask(next));
+
+		return;
 	} else {
 		u16 new_asid;
 		bool need_flush;
@@ -199,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		}
 
 		/* Stop remote flushes for the previous mm */
-		if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
-			cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
-
-		VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+				real_prev != &init_mm);
+		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
 
 		/*
 		 * Start remote flushes and then read tlb_gen.
@@ -233,6 +213,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 }
 
 /*
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm.  Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row.  It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+		return;
+
+	if (static_branch_unlikely(&tlb_use_lazy_mode)) {
+		/*
+		 * There's a significant optimization that may be possible
+		 * here.  We have accurate enough TLB flush tracking that we
+		 * don't need to maintain coherence of TLB per se when we're
+		 * lazy.  We do, however, need to maintain coherence of
+		 * paging-structure caches.  We could, in principle, leave our
+		 * old mm loaded and only switch to init_mm when
+		 * tlb_remove_page() happens.
+		 */
+		this_cpu_write(cpu_tlbstate.is_lazy, true);
+	} else {
+		switch_mm(NULL, &init_mm, NULL);
+	}
+}
+
+/*
  * Call this when reinitializing a CPU.  It fixes the following potential
  * problems:
  *
@@ -303,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 	/* This code cannot presently handle being reentered. */
 	VM_WARN_ON(!irqs_disabled());
 
+	if (unlikely(loaded_mm == &init_mm))
+		return;
+
 	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
 		   loaded_mm->context.ctx_id);
 
-	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
 		/*
-		 * We're in lazy mode -- don't flush.  We can get here on
-		 * remote flushes due to races and on local flushes if a
-		 * kernel thread coincidentally flushes the mm it's lazily
-		 * still using.
+		 * We're in lazy mode.  We need to at least flush our
+		 * paging-structure cache to avoid speculatively reading
+		 * garbage into our TLB.  Since switching to init_mm is barely
+		 * slower than a minimal flush, just switch to init_mm.
 		 */
+		switch_mm_irqs_off(NULL, &init_mm, NULL);
 		return;
 	}
 
@@ -611,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
 	return 0;
 }
 late_initcall(create_tlb_single_page_flush_ceiling);
+
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	char buf[2];
+
+	buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
+	buf[1] = '\n';
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t tlblazy_write_file(struct file *file,
+		 const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	bool val;
+
+	if (kstrtobool_from_user(user_buf, count, &val))
+		return -EINVAL;
+
+	if (val)
+		static_branch_enable(&tlb_use_lazy_mode);
+	else
+		static_branch_disable(&tlb_use_lazy_mode);
+
+	return count;
+}
+
+static const struct file_operations fops_tlblazy = {
+	.read = tlblazy_read_file,
+	.write = tlblazy_write_file,
+	.llseek = default_llseek,
+};
+
+static int __init init_tlb_use_lazy_mode(void)
+{
+	if (boot_cpu_has(X86_FEATURE_PCID)) {
+		/*
+		 * Heuristic: with PCID on, switching to and from
+		 * init_mm is reasonably fast, but remote flush IPIs
+		 * as expensive as ever, so turn off lazy TLB mode.
+		 *
+		 * We can't do this in setup_pcid() because static keys
+		 * haven't been initialized yet, and it would blow up
+		 * badly.
+		 */
+		static_branch_disable(&tlb_use_lazy_mode);
+	}
+
+	debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
+			    arch_debugfs_dir, NULL, &fops_tlblazy);
+	return 0;
+}
+late_initcall(init_tlb_use_lazy_mode);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-09-24 11:28 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-09-24 11:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Andy Lutomirski

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f5caf621ee357279e759c0911daf6d55c7d36f03 x86/asm: Fix inline asm call constraints for Clang

Another round of CR3/PCID related fixes (I think this addresses all but one of the 
known problems with PCID support), an objtool fix plus a Clang fix that (finally) 
solves all Clang quirks to build a bootable x86 kernel as-is.


  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  tools/objtool/check.c              # 0d0970eef3b0: objtool: Handle another GCC 

 Thanks,

	Ingo

------------------>
Andy Lutomirski (4):
      x86/mm: Factor out CR3-building code
      x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
      x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
      x86/mm/32: Load a sane CR3 before cpu_init() on secondary CPUs

Josh Poimboeuf (2):
      objtool: Handle another GCC stack pointer adjustment bug
      x86/asm: Fix inline asm call constraints for Clang


 arch/x86/include/asm/alternative.h               |  3 +-
 arch/x86/include/asm/asm.h                       | 11 ++++++
 arch/x86/include/asm/mmu_context.h               | 32 +++++++++++++++---
 arch/x86/include/asm/mshyperv.h                  | 10 +++---
 arch/x86/include/asm/paravirt_types.h            | 14 ++++----
 arch/x86/include/asm/preempt.h                   | 15 +++------
 arch/x86/include/asm/processor.h                 |  6 ++--
 arch/x86/include/asm/rwsem.h                     |  4 +--
 arch/x86/include/asm/uaccess.h                   |  4 +--
 arch/x86/include/asm/xen/hypercall.h             |  5 ++-
 arch/x86/kernel/cpu/bugs.c                       |  8 -----
 arch/x86/kernel/cpu/common.c                     |  8 +++++
 arch/x86/kernel/smpboot.c                        | 13 +++----
 arch/x86/kvm/emulate.c                           |  3 +-
 arch/x86/kvm/vmx.c                               |  3 +-
 arch/x86/mm/fault.c                              |  3 +-
 arch/x86/mm/tlb.c                                | 11 +++---
 tools/objtool/Documentation/stack-validation.txt |  6 ++--
 tools/objtool/arch/x86/decode.c                  |  6 ++--
 tools/objtool/check.c                            | 43 ++++++++++++++++--------
 20 files changed, 122 insertions(+), 86 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
 {									      \
-	register void *__sp asm(_ASM_SP);				      \
 	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
-		: output, "+r" (__sp)					      \
+		: output, ASM_CALL_CONSTRAINT				      \
 		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
 		  [new2] "i" (newfunc2), ## input);			      \
 }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..c1eadbaf1115 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits.  This serves two purposes.  It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero.  It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		VM_WARN_ON_ONCE(asid > 4094);
+		return __sme_pa(mm->pgd) | (asid + 1);
+	} else {
+		VM_WARN_ON_ONCE(asid != 0);
+		return __sme_pa(mm->pgd);
+	}
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+	VM_WARN_ON_ONCE(asid > 4094);
+	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
 
 /*
  * This can be used from process context to figure out what the value of
@@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
-	if (static_cpu_has(X86_FEATURE_PCID))
-		cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
 	u64 hv_status;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("mov %4, %%r8\n"
 			     "call *%5"
-			     : "=a" (hv_status), "+r" (__sp),
+			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
 			     :  "r" (output_address), "m" (hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("call *%7"
 			     : "=A" (hv_status),
-			       "+c" (input_address_lo), "+r" (__sp)
+			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	{
 		__asm__ __volatile__("call *%4"
-				     : "=a" (hv_status), "+r" (__sp),
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
 				     : "m" (hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		__asm__ __volatile__ ("call *%5"
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
-					"+r"(__sp)
+					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 					"b" (input1_hi),
 					"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
  */
 #ifdef CONFIG_X86_32
 #define PVOP_VCALL_ARGS							\
-	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;	\
-	register void *__sp asm("esp")
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
 #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
 /* [re]ax isn't an arg, but the return val */
 #define PVOP_VCALL_ARGS						\
 	unsigned long __edi = __edi, __esi = __esi,		\
-		__edx = __edx, __ecx = __ecx, __eax = __eax;	\
-	register void *__sp asm("rsp")
+		__edx = __edx, __ecx = __ecx, __eax = __eax;
+
 #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : call_clbr, "+r" (__sp)			\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
 			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
 
 #ifdef CONFIG_PREEMPT
   extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()					\
-({								\
-	register void *__sp asm(_ASM_SP);			\
-	asm volatile ("call ___preempt_schedule" : "+r"(__sp));	\
-})
+# define __preempt_schedule() \
+	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
 
   extern asmlinkage void preempt_schedule(void);
   extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()					\
-({									\
-	register void *__sp asm(_ASM_SP);				\
-	asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp));	\
-})
+# define __preempt_schedule_notrace() \
+	asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
   extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
 	 * Like all of Linux's memory ordering operations, this is a
 	 * compiler barrier as well.
 	 */
-	register void *__sp asm(_ASM_SP);
-
 #ifdef CONFIG_X86_32
 	asm volatile (
 		"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
 		"pushl $1f\n\t"
 		"iret\n\t"
 		"1:"
-		: "+r" (__sp) : : "memory");
+		: ASM_CALL_CONSTRAINT : : "memory");
 #else
 	unsigned int tmp;
 
@@ -703,7 +701,7 @@ static inline void sync_core(void)
 		"iretq\n\t"
 		UNWIND_HINT_RESTORE
 		"1:"
-		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({							\
 	long tmp;					\
 	struct rw_semaphore* ret;			\
-	register void *__sp asm(_ASM_SP);		\
 							\
 	asm volatile("# beginning down_write\n\t"	\
 		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 		     "  call " slow_path "\n"		\
 		     "1:\n"				\
 		     "# ending down_write"		\
-		     : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+		     : "+m" (sem->count), "=d" (tmp),	\
+		       "=a" (ret), ASM_CALL_CONSTRAINT	\
 		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
 		     : "memory", "cc");			\
 	ret;						\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..78e8fcc87d4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 ({									\
 	int __ret_gu;							\
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
-	register void *__sp asm(_ASM_SP);				\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
 	asm volatile("call __get_user_%P4"				\
-		     : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)	\
+		     : "=a" (__ret_gu), "=r" (__val_gu),		\
+			ASM_CALL_CONSTRAINT				\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..128a1a0b1450 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
 	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
 	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
 	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-	register void *__sp asm(_ASM_SP);
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM	"=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM	"=r" (__res), ASM_CALL_CONSTRAINT
 #define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
 #define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
 #define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
 
 void __init check_bugs(void)
 {
-#ifdef CONFIG_X86_32
-	/*
-	 * Regardless of whether PCID is enumerated, the SDM says
-	 * that it can't be enabled in 32-bit mode.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
 		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
-	cpu_init();
-	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
-	smp_callin();
-
-	enable_start_cpu0 = 0;
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
 	__flush_tlb_all();
 #endif
 
+	cpu_init();
+	x86_cpuinit.early_percpu_clock_init();
+	preempt_disable();
+	smp_callin();
+
+	enable_start_cpu0 = 0;
+
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..f23f13403f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
-	register void *__sp asm(_ASM_SP);
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
 	if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-	      [fastop]"+S"(fop), "+r"(__sp)
+	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
 	    : "c"(ctxt->src2.val));
 
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..6ee237f509dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	register void *__sp asm(_ASM_SP);
 
 	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
 			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 			[sp]"=&r"(tmp),
 #endif
-			"+r"(__sp)
+			ASM_CALL_CONSTRAINT
 			:
 			[entry]"r"(entry),
 			[ss]"i"(__KERNEL_DS),
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..39567b5c33da 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		register void *__sp asm("rsp");
 		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		asm volatile ("movq %[stack], %%rsp\n\t"
 			      "call handle_stack_overflow\n\t"
 			      "1: jmp 1b"
-			      : "+r" (__sp)
+			      : ASM_CALL_CONSTRAINT
 			      : "D" ("kernel stack overflow (page fault)"),
 				"S" (regs), "d" (address),
 				[stack] "rm" (stack));
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..93fe97cce581 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() !=
-			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 */
 			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
 				       next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | prev_asid);
+			write_cr3(build_cr3(next, prev_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		}
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | new_asid);
+			write_cr3(build_cr3(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
-			write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+			write_cr3(build_cr3_noflush(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(cr3 & ~CR3_PCID_MASK);
+	write_cr3(build_cr3(mm, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.
 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..0f22768c0d4d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0x89:
-		if (rex == 0x48 && modrm == 0xe5) {
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
-			/* mov %rsp, %rbp */
+			/* mov %rsp, reg */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_REG;
 			op->src.reg = CFI_SP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_BP;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
 			break;
 		}
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		switch (op->src.type) {
 
 		case OP_SRC_REG:
-			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			}
 
-				if (cfa->base == CFI_SP &&
-				    regs[CFI_BP].base == CFI_CFA &&
-				    regs[CFI_BP].offset == -cfa->offset) {
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
 
-					/* mov %rsp, %rbp */
-					cfa->base = op->dest.reg;
-					state->bp_scratch = false;
-				}
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
 
-				else if (state->drap) {
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
-					/* drap: mov %rsp, %rbp */
-					regs[CFI_BP].base = CFI_BP;
-					regs[CFI_BP].offset = -state->stack_size;
-					state->bp_scratch = false;
-				}
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
 			}
 
 			else if (op->dest.reg == cfa->base) {

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-09-13 17:54 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-09-13 17:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andy Lutomirski, Andrew Morton, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1278f58cdee63cfbb04e5624474a291c81a7a13b x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition

The main changes are the PCID fixes from Andy, but there's also two hyperv fixes 
and two paravirt updates.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  MAINTAINERS                        # 30c1bbffe629: paravirt: Switch maintainer

 Thanks,

	Ingo

------------------>
Andy Lutomirski (3):
      x86/mm: Get rid of VM_BUG_ON in switch_tlb_irqs_off()
      x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3
      x86/mm/64: Initialize CR4.PCIDE early

Juergen Gross (2):
      x86/paravirt: Remove no longer used paravirt functions
      paravirt: Switch maintainer

K. Y. Srinivasan (1):
      x86/hyper-V: Allocate the IDT entry early in boot

Vitaly Kuznetsov (1):
      x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition


 MAINTAINERS                           |  4 +--
 arch/x86/include/asm/desc.h           |  3 +--
 arch/x86/include/asm/paravirt.h       | 37 --------------------------
 arch/x86/include/asm/paravirt_types.h |  9 -------
 arch/x86/include/asm/pgtable.h        | 27 +++----------------
 arch/x86/include/asm/special_insns.h  | 10 +++----
 arch/x86/include/uapi/asm/hyperv.h    |  6 -----
 arch/x86/kernel/cpu/common.c          | 49 +++++------------------------------
 arch/x86/kernel/cpu/mshyperv.c        |  4 +--
 arch/x86/kernel/paravirt.c            |  5 ----
 arch/x86/kernel/setup.c               |  5 +++-
 arch/x86/kernel/smpboot.c             |  8 +++---
 arch/x86/kvm/vmx.c                    |  2 +-
 arch/x86/mm/init.c                    | 34 ++++++++++++++++++++++++
 arch/x86/mm/pgtable.c                 |  7 +----
 arch/x86/mm/tlb.c                     | 22 +++++++++++++++-
 arch/x86/power/hibernate_64.c         | 21 ++++++++++++++-
 arch/x86/xen/enlighten_pv.c           |  2 --
 arch/x86/xen/mmu_pv.c                 |  2 --
 19 files changed, 107 insertions(+), 150 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f46a3225e398..14e76a41b302 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10129,7 +10129,7 @@ F:	include/uapi/linux/ppdev.h
 F:	Documentation/parport*.txt
 
 PARAVIRT_OPS INTERFACE
-M:	Jeremy Fitzhardinge <jeremy@goop.org>
+M:	Juergen Gross <jgross@suse.com>
 M:	Chris Wright <chrisw@sous-sol.org>
 M:	Alok Kataria <akataria@vmware.com>
 M:	Rusty Russell <rusty@rustcorp.com.au>
@@ -10137,7 +10137,7 @@ L:	virtualization@lists.linux-foundation.org
 S:	Supported
 F:	Documentation/virtual/paravirt_ops.txt
 F:	arch/*/kernel/paravirt*
-F:	arch/*/include/asm/paravirt.h
+F:	arch/*/include/asm/paravirt*.h
 F:	include/linux/hypervisor.h
 
 PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1a2ba368da39..9d0e13738ed3 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
 #define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))
 
 #define store_gdt(dtr)				native_store_gdt(dtr)
-#define store_idt(dtr)				native_store_idt(dtr)
 #define store_tr(tr)				(tr = native_store_tr())
 
 #define load_TLS(t, cpu)			native_load_tls(t, cpu)
@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
 	asm volatile("sgdt %0":"=m" (*dtr));
 }
 
-static inline void native_store_idt(struct desc_ptr *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
 {
 	asm volatile("sidt %0":"=m" (*dtr));
 }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25dd22f7c70..12deec722cf0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
 	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
-}
-
 static inline void __write_cr4(unsigned long x)
 {
 	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
 {
 	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
-static inline void store_idt(struct desc_ptr *dtr)
-{
-	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
-}
 static inline unsigned long paravirt_store_tr(void)
 {
 	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 	PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
 }
 
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep)
-{
-	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
-}
-
 static inline pte_t __pte(pteval_t val)
 {
 	pteval_t ret;
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 		PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
 }
 
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-			      pmd_t *pmdp, pmd_t pmd)
-{
-	if (sizeof(pmdval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
-	else
-		PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
-			    native_pmd_val(pmd));
-}
-
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
-			      pud_t *pudp, pud_t pud)
-{
-	if (sizeof(pudval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
-	else
-		PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
-			    native_pud_val(pud));
-}
-
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6b64fc6367f2..42873edd9f9d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
 	unsigned long (*read_cr0)(void);
 	void (*write_cr0)(unsigned long);
 
-	unsigned long (*read_cr4)(void);
 	void (*write_cr4)(unsigned long);
 
 #ifdef CONFIG_X86_64
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct desc_ptr *);
 	void (*load_idt)(const struct desc_ptr *);
-	/* store_gdt has been removed. */
-	void (*store_idt)(struct desc_ptr *);
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
 	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, pte_t pteval);
 	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
-	void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
-			   pmd_t *pmdp, pmd_t pmdval);
-	void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
-			   pud_t *pudp, pud_t pudval);
-	void (*pte_update)(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep);
 
 	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
 					pte_t *ptep);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b4c44d419c5..b714934512b3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
 #else  /* !CONFIG_PARAVIRT */
 #define set_pte(ptep, pte)		native_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
-#define set_pmd_at(mm, addr, pmdp, pmd)	native_set_pmd_at(mm, addr, pmdp, pmd)
-#define set_pud_at(mm, addr, pudp, pud)	native_set_pud_at(mm, addr, pudp, pud)
 
 #define set_pte_atomic(ptep, pte)					\
 	native_set_pte_atomic(ptep, pte)
@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
 #define pte_clear(mm, addr, ptep)	native_pte_clear(mm, addr, ptep)
 #define pmd_clear(pmd)			native_pmd_clear(pmd)
 
-#define pte_update(mm, addr, ptep)              do { } while (0)
-
 #define pgd_val(x)	native_pgd_val(x)
 #define __pgd(x)	native_make_pgd(x)
 
@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	native_set_pte(ptep, pte);
 }
 
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
-				     pmd_t *pmdp , pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
 {
 	native_set_pmd(pmdp, pmd);
 }
 
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
-				     pud_t *pudp, pud_t pud)
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+			      pud_t *pudp, pud_t pud)
 {
 	native_set_pud(pudp, pud);
 }
 
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces.  It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.
- */
-#define pte_update(mm, addr, ptep)		do { } while (0)
-#endif
-
 /*
  * We only update the dirty/accessed state if we set
  * the dirty bit by hand in the kernel, since the hardware
@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
 	pte_t pte = native_ptep_get_and_clear(ptep);
-	pte_update(mm, addr, ptep);
 	return pte;
 }
 
@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
 	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
-	pte_update(mm, addr, ptep);
 }
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 9efaabf5b54b..a24dfcf79f4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
 
 extern asmlinkage void native_load_gs_index(unsigned);
 
+static inline unsigned long __read_cr4(void)
+{
+	return native_read_cr4();
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
 	native_write_cr3(x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-	return native_read_cr4();
-}
-
 static inline void __write_cr4(unsigned long x)
 {
 	native_write_cr4(x);
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7032f4d8dff3..f65d12504e80 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,12 +153,6 @@
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
 
 /*
- * HV_VP_SET available
- */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
-
-
-/*
  * Crash notification flag.
  */
 #define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb1d3358a4af..775f10100d7f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
 __setup("nompx", x86_mpx_setup);
 
 #ifdef CONFIG_X86_64
-static int __init x86_pcid_setup(char *s)
+static int __init x86_nopcid_setup(char *s)
 {
-	/* require an exact match without trailing characters */
-	if (strlen(s))
-		return 0;
+	/* nopcid doesn't accept parameters */
+	if (s)
+		return -EINVAL;
 
 	/* do not emit a message if the feature is not present */
 	if (!boot_cpu_has(X86_FEATURE_PCID))
-		return 1;
+		return 0;
 
 	setup_clear_cpu_cap(X86_FEATURE_PCID);
 	pr_info("nopcid: PCID feature disabled\n");
-	return 1;
+	return 0;
 }
-__setup("nopcid", x86_pcid_setup);
+early_param("nopcid", x86_nopcid_setup);
 #endif
 
 static int __init x86_noinvpcid_setup(char *s)
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	}
 }
 
-static void setup_pcid(struct cpuinfo_x86 *c)
-{
-	if (cpu_has(c, X86_FEATURE_PCID)) {
-		if (cpu_has(c, X86_FEATURE_PGE)) {
-			/*
-			 * We'd like to use cr4_set_bits_and_update_boot(),
-			 * but we can't.  CR4.PCIDE is special and can only
-			 * be set in long mode, and the early CPU init code
-			 * doesn't know this and would try to restore CR4.PCIDE
-			 * prior to entering long mode.
-			 *
-			 * Instead, we rely on the fact that hotplug, resume,
-			 * etc all fully restore CR4 before they write anything
-			 * that could have nonzero PCID bits to CR3.  CR4.PCIDE
-			 * has no effect on the page tables themselves, so we
-			 * don't need it to be restored early.
-			 */
-			cr4_set_bits(X86_CR4_PCIDE);
-		} else {
-			/*
-			 * flush_tlb_all(), as currently implemented, won't
-			 * work if PCID is on but PGE is not.  Since that
-			 * combination doesn't exist on real hardware, there's
-			 * no reason to try to fully support it, but it's
-			 * polite to avoid corrupting data if we're on
-			 * an improperly configured VM.
-			 */
-			clear_cpu_cap(c, X86_FEATURE_PCID);
-		}
-	}
-}
-
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	setup_smep(c);
 	setup_smap(c);
 
-	/* Set up PCID */
-	setup_pcid(c);
-
 	/*
 	 * The vendor-specific functions might have changed features.
 	 * Now we do "generic changes."
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3b3f713e15e5..236324e83a3a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
 void hv_setup_vmbus_irq(void (*handler)(void))
 {
 	vmbus_handler = handler;
-	/* Setup the IDT for hypervisor callback */
-	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 }
 
 void hv_remove_vmbus_irq(void)
@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
 	 */
 	x86_platform.apic_post_init = hyperv_init;
 	hyperv_setup_mmu_ops();
+	/* Setup the IDT for hypervisor callback */
+	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 #endif
 }
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a14df9eecfed..19a3e8f961c7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.set_debugreg = native_set_debugreg,
 	.read_cr0 = native_read_cr0,
 	.write_cr0 = native_write_cr0,
-	.read_cr4 = native_read_cr4,
 	.write_cr4 = native_write_cr4,
 #ifdef CONFIG_X86_64
 	.read_cr8 = native_read_cr8,
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.set_ldt = native_set_ldt,
 	.load_gdt = native_load_gdt,
 	.load_idt = native_load_idt,
-	.store_idt = native_store_idt,
 	.store_tr = native_store_tr,
 	.load_tls = native_load_tls,
 #ifdef CONFIG_X86_64
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
 	.set_pmd = native_set_pmd,
-	.set_pmd_at = native_set_pmd_at,
-	.pte_update = paravirt_nop,
 
 	.ptep_modify_prot_start = __ptep_modify_prot_start,
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 	.pmd_clear = native_pmd_clear,
 #endif
 	.set_pud = native_set_pud,
-	.set_pud_at = native_set_pud_at,
 
 	.pmd_val = PTE_IDENT,
 	.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d84afb0a322d..0957dd73d127 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
 	 * with the current CR4 value.  This may not be necessary, but
 	 * auditing all the early-boot CR4 manipulation would be needed to
 	 * rule it out.
+	 *
+	 * Mask off features that don't work outside long mode (just
+	 * PCIDE for now).
 	 */
-	mmu_cr4_features = __read_cr4();
+	mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
 
 	memblock_set_current_limit(get_max_mapped());
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index cd6622c3204e..0854ff169274 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -226,10 +226,12 @@ static int enable_start_cpu0;
 static void notrace start_secondary(void *unused)
 {
 	/*
-	 * Don't put *anything* before cpu_init(), SMP booting is too
-	 * fragile that we want to limit the things done here to the
-	 * most necessary things.
+	 * Don't put *anything* except direct CPU state initialization
+	 * before cpu_init(), SMP booting is too fragile that we want to
+	 * limit the things done here to the most necessary things.
 	 */
+	if (boot_cpu_has(X86_FEATURE_PCID))
+		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
 	cpu_init();
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4253adef9044..699704d4bc9e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-	native_store_idt(&dt);
+	store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 	vmx->host_idt_base = dt.address;
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7777ccc0e9f9..af5c1ed21d43 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -19,6 +19,7 @@
 #include <asm/microcode.h>
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
+#include <asm/cpufeature.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
 	}
 }
 
+static void setup_pcid(void)
+{
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_PCID)) {
+		if (boot_cpu_has(X86_FEATURE_PGE)) {
+			/*
+			 * This can't be cr4_set_bits_and_update_boot() --
+			 * the trampoline code can't handle CR4.PCIDE and
+			 * it wouldn't do any good anyway.  Despite the name,
+			 * cr4_set_bits_and_update_boot() doesn't actually
+			 * cause the bits in question to remain set all the
+			 * way through the secondary boot asm.
+			 *
+			 * Instead, we brute-force it and set CR4.PCIDE
+			 * manually in start_secondary().
+			 */
+			cr4_set_bits(X86_CR4_PCIDE);
+		} else {
+			/*
+			 * flush_tlb_all(), as currently implemented, won't
+			 * work if PCID is on but PGE is not.  Since that
+			 * combination doesn't exist on real hardware, there's
+			 * no reason to try to fully support it, but it's
+			 * polite to avoid corrupting data if we're on
+			 * an improperly configured VM.
+			 */
+			setup_clear_cpu_cap(X86_FEATURE_PCID);
+		}
+	}
+#endif
+}
+
 #ifdef CONFIG_X86_32
 #define NR_RANGE_MR 3
 #else /* CONFIG_X86_64 */
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
 	unsigned long end;
 
 	probe_page_size_mask();
+	setup_pcid();
 
 #ifdef CONFIG_X86_64
 	end = max_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 218834a3e9ad..b372f3442bbf 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 {
 	int changed = !pte_same(*ptep, entry);
 
-	if (changed && dirty) {
+	if (changed && dirty)
 		*ptep = entry;
-		pte_update(vma->vm_mm, address, ptep);
-	}
 
 	return changed;
 }
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
 					 (unsigned long *) &ptep->pte);
 
-	if (ret)
-		pte_update(vma->vm_mm, addr, ptep);
-
 	return ret;
 }
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 37689a7cc03b..1ab3821f9e26 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * hypothetical buggy code that directly switches to swapper_pg_dir
 	 * without going through leave_mm() / switch_mm_irqs_off() or that
 	 * does something like write_cr3(read_cr3_pa()).
+	 *
+	 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+	 * isn't free.
 	 */
-	VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+	if (WARN_ON_ONCE(__read_cr3() !=
+			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+		/*
+		 * If we were to BUG here, we'd be very likely to kill
+		 * the system so hard that we don't see the call trace.
+		 * Try to recover instead by ignoring the error and doing
+		 * a global flush to minimize the chance of corruption.
+		 *
+		 * (This is far from being a fully correct recovery.
+		 *  Architecturally, the CPU could prefetch something
+		 *  back into an incorrect ASID slot and leave it there
+		 *  to cause trouble down the road.  It's better than
+		 *  nothing, though.)
+		 */
+		__flush_tlb_all();
+	}
+#endif
 
 	if (real_prev == next) {
 		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2598d81cd55..f910c514438f 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 		return -EOVERFLOW;
 	rdr->jump_address = (unsigned long)restore_registers;
 	rdr->jump_address_phys = __pa_symbol(restore_registers);
-	rdr->cr3 = restore_cr3;
+
+	/*
+	 * The restore code fixes up CR3 and CR4 in the following sequence:
+	 *
+	 * [in hibernation asm]
+	 * 1. CR3 <= temporary page tables
+	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+	 * 3. CR3 <= rdr->cr3
+	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+	 * [in restore_processor_state()]
+	 * 5. CR4 <= saved CR4
+	 * 6. CR3 <= saved CR3
+	 *
+	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+	 * rdr->cr3 needs to point to valid page tables but must not
+	 * have any of the PCID bits set.
+	 */
+	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
 	rdr->magic = RESTORE_MAGIC;
 
 	hibernation_e820_save(rdr->e820_digest);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae2a2e2d6362..69b9deff7e5c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_cr0 = xen_read_cr0,
 	.write_cr0 = xen_write_cr0,
 
-	.read_cr4 = native_read_cr4,
 	.write_cr4 = xen_write_cr4,
 
 #ifdef CONFIG_X86_64
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.alloc_ldt = xen_alloc_ldt,
 	.free_ldt = xen_free_ldt,
 
-	.store_idt = native_store_idt,
 	.store_tr = xen_store_tr,
 
 	.write_ldt_entry = xen_write_ldt_entry,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 6b983b300666..509f560bd0c6 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.flush_tlb_single = xen_flush_tlb_single,
 	.flush_tlb_others = xen_flush_tlb_others,
 
-	.pte_update = paravirt_nop,
-
 	.pgd_alloc = xen_pgd_alloc,
 	.pgd_free = xen_pgd_free,
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-09-12 15:38 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-09-12 15:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e2329b4252f373c244e75928be38bf1dd45b35da x86/cpu: Remove unused and undefined __generic_processor_info() declaration

Two fixes: dead code removal, plus a SME memory encryption fix on 32-bit kernels 
that crashed Xen guests.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/mem_encrypt.h        # 21d9bb4a05ba: x86/mm: Make the SME mask a 

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/mm: Make the SME mask a u64

Dou Liyang (1):
      x86/cpu: Remove unused and undefined __generic_processor_info() declaration


 arch/x86/include/asm/mem_encrypt.h |  4 ++--
 arch/x86/include/asm/mpspec.h      |  1 -
 arch/x86/kernel/apic/apic.c        |  2 +-
 arch/x86/mm/mem_encrypt.c          |  2 +-
 include/linux/mem_encrypt.h        | 13 +++++++++----
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 8e618fcf1f7c..6a77c63540f7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,7 +21,7 @@
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
-extern unsigned long sme_me_mask;
+extern u64 sme_me_mask;
 
 void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
 			 unsigned long decrypted_kernel_vaddr,
@@ -49,7 +49,7 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
-#define sme_me_mask	0UL
+#define sme_me_mask	0ULL
 
 static inline void __init sme_early_encrypt(resource_size_t paddr,
 					    unsigned long size) { }
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 831eb7895535..c471ca1f9412 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -86,7 +86,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
 #endif
 
 int generic_processor_info(int apicid, int version);
-int __generic_processor_info(int apicid, int version, bool enabled);
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_LOCAL_APIC)
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7834f73efbf1..6e19ef152869 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2130,7 +2130,7 @@ int generic_processor_info(int apicid, int version)
 	 * Since fixing handling of boot_cpu_physical_apicid requires
 	 * another discussion and tests on each platform, we leave it
 	 * for now and here we use read_apic_id() directly in this
-	 * function, __generic_processor_info().
+	 * function, generic_processor_info().
 	 */
 	if (disabled_cpu_apicid != BAD_APICID &&
 	    disabled_cpu_apicid != read_apic_id() &&
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 0fbd09269757..3fcc8e01683b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -37,7 +37,7 @@ static char sme_cmdline_off[] __initdata = "off";
  * reside in the .data section so as not to be zeroed out when the .bss
  * section is later cleared.
  */
-unsigned long sme_me_mask __section(.data) = 0;
+u64 sme_me_mask __section(.data) = 0;
 EXPORT_SYMBOL_GPL(sme_me_mask);
 
 /* Buffer used for early in-place encryption by BSP, no locking needed */
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 1255f09f5e42..265a9cd21cb4 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -21,7 +21,7 @@
 
 #else	/* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
-#define sme_me_mask	0UL
+#define sme_me_mask	0ULL
 
 #endif	/* CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
@@ -30,18 +30,23 @@ static inline bool sme_active(void)
 	return !!sme_me_mask;
 }
 
-static inline unsigned long sme_get_me_mask(void)
+static inline u64 sme_get_me_mask(void)
 {
 	return sme_me_mask;
 }
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
  * the encryption mask from a value (e.g. when dealing with pagetable
  * entries).
  */
-#define __sme_set(x)		((unsigned long)(x) | sme_me_mask)
-#define __sme_clr(x)		((unsigned long)(x) & ~sme_me_mask)
+#define __sme_set(x)		((x) | sme_me_mask)
+#define __sme_clr(x)		((x) & ~sme_me_mask)
+#else
+#define __sme_set(x)		(x)
+#define __sme_clr(x)		(x)
+#endif
 
 #endif	/* __ASSEMBLY__ */
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-08-26  7:26 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-08-26  7:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ccd5b3235180eef3cfec337df1c8554ab151b5cc x86/mm: Fix use-after-free of ldt_struct

Two fixes: one for an ldt_struct handling bug and a cherry-picked objtool fix.

 Thanks,

	Ingo

------------------>
Eric Biggers (1):
      x86/mm: Fix use-after-free of ldt_struct

Josh Poimboeuf (1):
      objtool: Fix '-mtune=atom' decoding support in objtool 2.0


 arch/x86/include/asm/mmu_context.h |  4 +---
 tools/objtool/arch/x86/decode.c    | 26 +++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907d7d4c..7a234be7e298 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.execute_only_pkey = -1;
 	}
 	#endif
-	init_new_context_ldt(tsk, mm);
-
-	return 0;
+	return init_new_context_ldt(tsk, mm);
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a36c2eba64e7..4559a21a8de2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (rex == 0x48 && modrm == 0x65) {
 
-			/* lea -disp(%rbp), %rsp */
+			/* lea disp(%rbp), %rsp */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = CFI_BP;
@@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			break;
 		}
 
+		if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) &&
+		    sib == 0x24) {
+
+			/* lea disp(%rsp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x48 && modrm == 0x2c && sib == 0x24) {
+
+			/* lea (%rsp), %rbp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+			break;
+		}
+
 		if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
 		    insn.displacement.value == 8) {
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-07-21 10:26 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-07-21 10:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: db15e7f27369b81b6605a546d54eb844f87370a5 x86/devicetree: Convert to using %pOF instead of ->full_name

Half of the fixes are for various build time warnings triggered by randconfig 
builds. Most (but not all...) were harmless. There's also:

 - ACPI boundary condition fixes,
 - UV platform fixes,
 - defconfig updates,
 - an AMD K6 CPU init fix,
 - a %pOF printk format related preparatory change,
 - and warning fix related to the tlb/PCID changes.

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Disable BAU on single hub configurations

Arnd Bergmann (7):
      perf/x86: Shut up false-positive -Wmaybe-uninitialized warning
      x86/fpu/math-emu: Fix possible uninitialized variable use
      x86/fpu/math-emu: Avoid bogus -Wint-in-bool-context warning
      x86/io: Add "memory" clobber to insb/insw/insl/outsb/outsw/outsl
      x86/build: Silence the build with "make -s"
      x86/platform: Add PCI dependency for PUNIT_ATOM_DEBUG
      x86/platform/intel-mid: Fix a format string overflow warning

Justin Ernst (1):
      x86/platform/uv/BAU: Fix congested_response_us not taking effect

Krzysztof Kozlowski (1):
      x86/defconfig: Remove stale, old Kconfig options

Mikulas Patocka (1):
      x86/cpu: Use indirect call to measure performance in init_amd_k6()

Rob Herring (1):
      x86/devicetree: Convert to using %pOF instead of ->full_name

Roman Kagan (1):
      x86/mm, KVM: Fix warning when !CONFIG_PREEMPT_COUNT

Seunghun Han (2):
      x86/acpi: Prevent out of bound access caused by broken ACPI tables
      x86/ioapic: Pass the correct data to unmask_ioapic_irq()


 arch/x86/Kconfig.debug                             |  1 +
 arch/x86/boot/Makefile                             |  5 ++--
 arch/x86/configs/i386_defconfig                    |  3 ---
 arch/x86/configs/x86_64_defconfig                  |  3 ---
 arch/x86/events/core.c                             |  4 +--
 arch/x86/include/asm/io.h                          |  4 +--
 arch/x86/include/asm/mmu_context.h                 |  2 +-
 arch/x86/kernel/acpi/boot.c                        |  8 ++++++
 arch/x86/kernel/apic/io_apic.c                     |  2 +-
 arch/x86/kernel/cpu/amd.c                          |  1 +
 arch/x86/kernel/devicetree.c                       |  3 +--
 arch/x86/math-emu/Makefile                         |  4 +--
 arch/x86/math-emu/fpu_emu.h                        |  2 +-
 arch/x86/math-emu/reg_compare.c                    | 16 ++++++------
 .../intel-mid/device_libs/platform_max7315.c       |  6 +++--
 arch/x86/platform/uv/tlb_uv.c                      | 29 +++++++++++++++-------
 16 files changed, 55 insertions(+), 38 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fcb7604172ce..cd20ca0b4043 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -348,6 +348,7 @@ config X86_DEBUG_FPU
 
 config PUNIT_ATOM_DEBUG
 	tristate "ATOM Punit debug driver"
+	depends on PCI
 	select DEBUG_FS
 	select IOSF_MBI
 	---help---
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0d810fb15eac..d88a2fddba8c 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -73,12 +73,13 @@ UBSAN_SANITIZE := n
 $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
 
 quiet_cmd_image = BUILD   $@
+silent_redirect_image = >/dev/null
 cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
-			       $(obj)/zoffset.h $@
+			       $(obj)/zoffset.h $@ $($(quiet)redirect_image)
 
 $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
 	$(call if_changed,image)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	@$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
 OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6cf79e1a6830..0eb9f92f3717 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_64BIT is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -125,7 +124,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -255,7 +253,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index de45f57b410d..4a4b16e56d35 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -124,7 +123,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -251,7 +249,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ff1ea2fb9705..8e3db8f642a7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -191,8 +191,8 @@ static void release_pmc_hardware(void) {}
 
 static bool check_hw_exists(void)
 {
-	u64 val, val_fail, val_new= ~0;
-	int i, reg, reg_fail, ret = 0;
+	u64 val, val_fail = -1, val_new= ~0;
+	int i, reg, reg_fail = -1, ret = 0;
 	int bios_fail = 0;
 	int reg_safe = -1;
 
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2f07f4..48febf07e828 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -328,13 +328,13 @@ static inline unsigned type in##bwl##_p(int port)			\
 static inline void outs##bwl(int port, const void *addr, unsigned long count) \
 {									\
 	asm volatile("rep; outs" #bwl					\
-		     : "+S"(addr), "+c"(count) : "d"(port));		\
+		     : "+S"(addr), "+c"(count) : "d"(port) : "memory");	\
 }									\
 									\
 static inline void ins##bwl(int port, void *addr, unsigned long count)	\
 {									\
 	asm volatile("rep; ins" #bwl					\
-		     : "+D"(addr), "+c"(count) : "d"(port));		\
+		     : "+D"(addr), "+c"(count) : "d"(port) : "memory");	\
 }
 
 BUILDIO(b, b, char)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index ecfcb6643c9b..265c907d7d4c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -293,7 +293,7 @@ static inline unsigned long __get_current_cr3_fast(void)
 	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
 	/* For now, be very restrictive about when this can be called. */
-	VM_WARN_ON(in_nmi() || !in_atomic());
+	VM_WARN_ON(in_nmi() || preemptible());
 
 	VM_BUG_ON(cr3 != __read_cr3());
 	return cr3;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb680671088..7491e73d9253 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -347,6 +347,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	struct mpc_intsrc mp_irq;
 
 	/*
+	 * Check bus_irq boundary.
+	 */
+	if (bus_irq >= NR_IRQS_LEGACY) {
+		pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
+		return;
+	}
+
+	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
 	ioapic = mp_find_ioapic(gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b4f5f73febdb..237e9c2341c7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2093,7 +2093,7 @@ static inline void __init check_timer(void)
 			int idx;
 			idx = find_irq_entry(apic1, pin1, mp_INT);
 			if (idx != -1 && irq_trigger(idx))
-				unmask_ioapic_irq(irq_get_chip_data(0));
+				unmask_ioapic_irq(irq_get_irq_data(0));
 		}
 		irq_domain_deactivate_irq(irq_data);
 		irq_domain_activate_irq(irq_data);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8f5fd4..3b9e220621f8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -134,6 +134,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
 		n = K6_BUG_LOOP;
 		f_vide = vide;
+		OPTIMIZER_HIDE_VAR(f_vide);
 		d = rdtsc();
 		while (n--)
 			f_vide();
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced4..cbf1f6ba39a8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -235,8 +235,7 @@ static void __init dtb_add_ioapic(struct device_node *dn)
 
 	ret = of_address_to_resource(dn, 0, &r);
 	if (ret) {
-		printk(KERN_ERR "Can't obtain address from node %s.\n",
-				dn->full_name);
+		printk(KERN_ERR "Can't obtain address from device node %pOF.\n", dn);
 		return;
 	}
 	mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile
index 9b0c63b60302..1b2dac174321 100644
--- a/arch/x86/math-emu/Makefile
+++ b/arch/x86/math-emu/Makefile
@@ -5,8 +5,8 @@
 #DEBUG	= -DDEBUGGING
 DEBUG	=
 PARANOID = -DPARANOID
-EXTRA_CFLAGS	:= $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
-EXTRA_AFLAGS	:= $(PARANOID)
+ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+asflags-y += $(PARANOID)
 
 # From 'C' language sources:
 C_OBJS =fpu_entry.o errors.o \
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index afbc4d805d66..c9c320dccca1 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -157,7 +157,7 @@ extern u_char const data_sizes_16[32];
 
 #define signbyte(a) (((u_char *)(a))[9])
 #define getsign(a) (signbyte(a) & 0x80)
-#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
+#define setsign(a,b) { if ((b) != 0) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
 #define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
                         else signbyte(b) &= 0x7f; }
 #define changesign(a) { signbyte(a) ^= 0x80; }
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index b77360fdbf4a..19b33b50adfa 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -168,7 +168,7 @@ static int compare(FPU_REG const *b, int tagb)
 /* This function requires that st(0) is not empty */
 int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 {
-	int f = 0, c;
+	int f, c;
 
 	c = compare(loaded_data, loaded_tag);
 
@@ -189,12 +189,12 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x121);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -205,7 +205,7 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 
 static int compare_st_st(int nr)
 {
-	int f = 0, c;
+	int f, c;
 	FPU_REG *st_ptr;
 
 	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
@@ -235,12 +235,12 @@ static int compare_st_st(int nr)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -283,12 +283,12 @@ static int compare_i_st_st(int nr)
 	case COMP_No_Comp:
 		f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
 		break;
-#ifdef PARANOID
 	default:
+#ifdef PARANOID
 		EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 		f = 0;
 		break;
-#endif /* PARANOID */
 	}
 	FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
 	if (c & COMP_Denormal) {
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 6e075afa7877..58337b2bc682 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -38,8 +38,10 @@ static void __init *max7315_platform_data(void *info)
 	 */
 	strcpy(i2c_info->type, "max7315");
 	if (nr++) {
-		sprintf(base_pin_name, "max7315_%d_base", nr);
-		sprintf(intr_pin_name, "max7315_%d_int", nr);
+		snprintf(base_pin_name, sizeof(base_pin_name),
+			 "max7315_%d_base", nr);
+		snprintf(intr_pin_name, sizeof(intr_pin_name),
+			 "max7315_%d_int", nr);
 	} else {
 		strcpy(base_pin_name, "max7315_base");
 		strcpy(intr_pin_name, "max7315_int");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index d4a61ddf9e62..3e4bdb442fbc 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -40,7 +40,6 @@ static int timeout_base_ns[] = {
 static int timeout_us;
 static bool nobau = true;
 static int nobau_perm;
-static cycles_t congested_cycles;
 
 /* tunables: */
 static int max_concurr		= MAX_BAU_CONCURRENT;
@@ -829,10 +828,10 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
 		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
 			bcp->period_requests++;
 			bcp->period_time += elapsed;
-			if ((elapsed > congested_cycles) &&
+			if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
 			    (bcp->period_requests > bcp->cong_reps) &&
 			    ((bcp->period_time / bcp->period_requests) >
-							congested_cycles)) {
+					usec_2_cycles(bcp->cong_response_us))) {
 				stat->s_congested++;
 				disable_for_period(bcp, stat);
 			}
@@ -2222,14 +2221,17 @@ static int __init uv_bau_init(void)
 	else if (is_uv1_hub())
 		ops = uv1_bau_ops;
 
+	nuvhubs = uv_num_possible_blades();
+	if (nuvhubs < 2) {
+		pr_crit("UV: BAU disabled - insufficient hub count\n");
+		goto err_bau_disable;
+	}
+
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 	}
 
-	nuvhubs = uv_num_possible_blades();
-	congested_cycles = usec_2_cycles(congested_respns_us);
-
 	uv_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		cpus = uv_blade_nr_possible_cpus(uvhub);
@@ -2242,9 +2244,8 @@ static int __init uv_bau_init(void)
 		enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		set_bau_off();
-		nobau_perm = 1;
-		return 0;
+		pr_crit("UV: BAU disabled - per CPU init failed\n");
+		goto err_bau_disable;
 	}
 
 	vector = UV_BAU_MESSAGE;
@@ -2270,6 +2271,16 @@ static int __init uv_bau_init(void)
 	}
 
 	return 0;
+
+err_bau_disable:
+
+	for_each_possible_cpu(cur_cpu)
+		free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
+
+	set_bau_off();
+	nobau_perm = 1;
+
+	return -EINVAL;
 }
 core_initcall(uv_bau_init);
 fs_initcall(uv_ptc_init);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-06-10  9:03 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-06-10  9:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 5b0bc9ac2ce4881ee318a21f31140584ce4dbdad x86/microcode/intel: Clear patch pointer before jettisoning the initrd

Misc fixes: a Geode fix plus a microcode loader fix.

 Thanks,

	Ingo

------------------>
Christian Sünkenberg (1):
      x86/cpu/cyrix: Add alternative Device ID of Geode GX1 SoC

Dominik Brodowski (1):
      x86/microcode/intel: Clear patch pointer before jettisoning the initrd


 arch/x86/kernel/cpu/cyrix.c           | 1 +
 arch/x86/kernel/cpu/microcode/intel.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61095f8..6f077445647a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 		break;
 
 	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+	case 11: /* GX1 with inverted Device ID */
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afdfd237b59f..f522415bf9e5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -619,6 +619,9 @@ int __init save_microcode_in_initrd_intel(void)
 
 	show_saved_mc();
 
+	/* initrd is going away, clear patch ptr. */
+	intel_ucode_patch = NULL;
+
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-06-02  6:54 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-06-02  6:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c08d517480ea342cc43acdacc5cf4a795e18151d Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"

Misc fixes:

 - revert a broken PAT commit that broke a number of systems,

 - fix two preemptability warnings/bugs that can trigger under certain 
   circumstances, in the debug code and in the microcode loader.

 Thanks,

	Ingo

------------------>
Borislav Petkov (2):
      x86/microcode/AMD: Change load_microcode_amd()'s param to bool to fix preemptibility bug
      x86/debug/32: Convert a smp_processor_id() call to raw to avoid DEBUG_PREEMPT warning

Ingo Molnar (1):
      Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"


 arch/x86/kernel/cpu/microcode/amd.c | 16 ++++++++--------
 arch/x86/kernel/process_32.c        |  2 +-
 arch/x86/mm/pat.c                   |  9 +++------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 45db4d2ebd01..e9f4d762aa5b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
 
 int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 {
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 	if (!desc.mc)
 		return -EINVAL;
 
-	ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
-				 desc.data, desc.size);
+	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
@@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
 	enum ucode_state ret;
 
@@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
 
 #ifdef CONFIG_X86_32
 	/* save BSP's matching patch for early load */
-	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(cpu);
+	if (save) {
+		struct ucode_patch *p = find_patch(0);
 		if (p) {
 			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
 			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	enum ucode_state ret = UCODE_NFOUND;
 	const struct firmware *fw;
 
 	/* reload ucode container only on the boot cpu */
-	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+	if (!refresh_fw || !bsp)
 		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
@@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+	ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
 
  fw_release:
 	release_firmware(fw);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74c9181..ffeae818aa7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
 	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
-		smp_processor_id());
+		raw_smp_processor_id());
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83a59a67757a..9b78685b66e6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -65,11 +65,9 @@ static int __init nopat(char *str)
 }
 early_param("nopat", nopat);
 
-static bool __read_mostly __pat_initialized = false;
-
 bool pat_enabled(void)
 {
-	return __pat_initialized;
+	return !!__pat_enabled;
 }
 EXPORT_SYMBOL_GPL(pat_enabled);
 
@@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat)
 	}
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
-	__pat_initialized = true;
 
 	__init_cache_modes(pat);
 }
 
 static void pat_ap_init(u64 pat)
 {
-	if (!this_cpu_has(X86_FEATURE_PAT)) {
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
 		/*
 		 * If this happens we are on a secondary CPU, but switched to
 		 * PAT on the boot CPU. We have no way to undo PAT.
@@ -309,7 +306,7 @@ void pat_init(void)
 	u64 pat;
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	if (!__pat_enabled) {
+	if (!pat_enabled()) {
 		init_cache_modes();
 		return;
 	}

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-05-12  7:39 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-05-12  7:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: fb8fb46c56289b3f34b5d90a4ec65e9e4e4544a5 x86/intel_rdt: Fix a typo in Documentation

It's mostly misc fixes:

 - two boot crash fixes
 - unwinder fixes
 - kexec related kernel direct mappings enhancements/fixes
 - more Clang support quirks
 - minor cleanups
 - Documentation fixes

 Thanks,

	Ingo

------------------>
Andy Lutomirski (1):
      x86/boot/32: Fix UP boot on Quark and possibly other platforms

Baoquan He (1):
      x86/mm: Fix boot crash caused by incorrect loop count calculation in sync_global_pgds()

Colin Ian King (1):
      x86/microcode/AMD: Remove redundant NULL check on mc

Josh Poimboeuf (1):
      x86/asm: Don't use RBP as a temporary register in csum_partial_copy_generic()

Kees Cook (1):
      x86/boot: Declare error() as noreturn

Laura Abbott (1):
      x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init()

Matthias Kaehlcke (1):
      x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility

Nick Desaulniers (1):
      x86/build: Don't add -maccumulate-outgoing-args w/o compiler support

Xiaochen Shen (1):
      x86/intel_rdt: Fix a typo in Documentation

Xunlei Pang (2):
      x86/mm: Add support for gbpages to kernel_ident_mapping_init()
      x86/kexec/64: Use gbpages for identity mappings if available


 Documentation/x86/intel_rdt_ui.txt   |  2 +-
 arch/x86/Makefile                    |  3 ++-
 arch/x86/boot/compressed/error.h     |  4 +++-
 arch/x86/boot/compressed/pagetable.c |  2 +-
 arch/x86/include/asm/asm.h           |  1 +
 arch/x86/include/asm/init.h          |  3 ++-
 arch/x86/kernel/cpu/microcode/amd.c  |  2 --
 arch/x86/kernel/machine_kexec_64.c   |  6 +++++-
 arch/x86/kernel/setup.c              | 15 +++++++++++++++
 arch/x86/kernel/setup_percpu.c       | 10 +++++-----
 arch/x86/lib/csum-copy_64.S          | 12 ++++++------
 arch/x86/lib/kaslr.c                 |  3 ++-
 arch/x86/mm/ident_map.c              | 14 +++++++++++++-
 arch/x86/mm/init_64.c                | 12 ++++++------
 arch/x86/mm/numa_32.c                |  1 +
 arch/x86/power/hibernate_64.c        |  2 +-
 16 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 0f6d8477b66c..c491a1b82de2 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -295,7 +295,7 @@ kernel and the tasks running there get 50% of the cache. They should
 also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
 siblings and only the real time threads are scheduled on the cores 4-7.
 
-# echo C0 > p0/cpus
+# echo F0 > p0/cpus
 
 4) Locking between applications
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4430dd489620..5851411e60fb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL
 endif
 
 ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
-	KBUILD_CFLAGS += -maccumulate-outgoing-args
+	# This compiler flag is not supported by Clang:
+	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
 # Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 2e59dac07f9e..d732e608e3af 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
 #ifndef BOOT_COMPRESSED_ERROR_H
 #define BOOT_COMPRESSED_ERROR_H
 
+#include <linux/compiler.h>
+
 void warn(char *m);
-void error(char *m);
+void error(char *m) __noreturn;
 
 #endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 56589d0a804b..1d78f1739087 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -70,7 +70,7 @@ static unsigned long level4p;
  * Due to relocation, pointers must be assigned at run time not build time.
  */
 static struct x86_mapping_info mapping_info = {
-	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+	.page_flag       = __PAGE_KERNEL_LARGE_EXEC,
 };
 
 /* Locates and clears a region for a new top level page table. */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7acb51c49fec..7a9df3beb89b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -32,6 +32,7 @@
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_MUL	__ASM_SIZE(mul)
 
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62bfeb0..474eb8c66fee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,8 +4,9 @@
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
-	unsigned long pmd_flag;		 /* page flag for PMD entry */
+	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
+	bool direct_gbpages;		 /* PUD level 1GB page support */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 7889ae492af0..1d38e53c2d5c 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -352,8 +352,6 @@ void reload_ucode_amd(void)
 	u32 rev, dummy;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
-	if (!mc)
-		return;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 085c3b300d32..c25d277d7d7e 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -122,6 +122,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 
 	level4p = (pgd_t *)__va(start_pgtable);
 	clear_page(level4p);
+
+	if (direct_gbpages)
+		info.direct_gbpages = true;
+
 	for (i = 0; i < nr_pfn_mapped; i++) {
 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 603a1669a2ec..0b4d3c686b1e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1225,6 +1225,21 @@ void __init setup_arch(char **cmdline_p)
 
 	kasan_init();
 
+#ifdef CONFIG_X86_32
+	/* sync back kernel address range */
+	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
+
 	tboot_probe();
 
 	map_vsyscall();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bb1e8cc0bc84..10edd1e69a68 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -291,11 +291,11 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * Sync back kernel address range.  We want to make sure that
-	 * all kernel mappings, including percpu mappings, are available
-	 * in the smpboot asm.  We can't reliably pick up percpu
-	 * mappings using vmalloc_fault(), because exception dispatch
-	 * needs percpu data.
+	 * Sync back kernel address range again.  We already did this in
+	 * setup_arch(), but percpu data also needs to be available in
+	 * the smpboot asm.  We can't reliably pick up percpu mappings
+	 * using vmalloc_fault(), because exception dispatch needs
+	 * percpu data.
 	 */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 7e48807b2fa1..45a53dfe1859 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
 	movq  %r12, 3*8(%rsp)
 	movq  %r14, 4*8(%rsp)
 	movq  %r13, 5*8(%rsp)
-	movq  %rbp, 6*8(%rsp)
+	movq  %r15, 6*8(%rsp)
 
 	movq  %r8, (%rsp)
 	movq  %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
 	source
 	movq  32(%rdi), %r10
 	source
-	movq  40(%rdi), %rbp
+	movq  40(%rdi), %r15
 	source
 	movq  48(%rdi), %r14
 	source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r11, %rax
 	adcq  %rdx, %rax
 	adcq  %r10, %rax
-	adcq  %rbp, %rax
+	adcq  %r15, %rax
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
 	dest
 	movq %r10, 32(%rsi)
 	dest
-	movq %rbp, 40(%rsi)
+	movq %r15, 40(%rsi)
 	dest
 	movq %r14, 48(%rsi)
 	dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
 	movq 3*8(%rsp), %r12
 	movq 4*8(%rsp), %r14
 	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %rbp
+	movq 6*8(%rsp), %r15
 	addq $7*8, %rsp
 	ret
 
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 5761a4f19455..ab2d1d73e9e7 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -5,6 +5,7 @@
  * kernel starts. This file is included in the compressed kernel and
  * normally linked in the regular.
  */
+#include <asm/asm.h>
 #include <asm/kaslr.h>
 #include <asm/msr.h>
 #include <asm/archrandom.h>
@@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose)
 	}
 
 	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
+	asm(_ASM_MUL "%3"
 	    : "=a" (random), "=d" (raw)
 	    : "a" (random), "rm" (mix_const));
 	random += raw;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a29dd60..adab1595f4bd 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 		if (pmd_present(*pmd))
 			continue;
 
-		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
+		set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
 	}
 }
 
@@ -30,6 +30,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (next > end)
 			next = end;
 
+		if (info->direct_gbpages) {
+			pud_t pudval;
+
+			if (pud_present(*pud))
+				continue;
+
+			addr &= PUD_MASK;
+			pudval = __pud((addr - info->offset) | info->page_flag);
+			set_pud(pud, pudval);
+			continue;
+		}
+
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
 			ident_pmd_init(info, pmd, addr, next);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 745e5e183169..97fe88749e18 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup);
  */
 void sync_global_pgds(unsigned long start, unsigned long end)
 {
-	unsigned long address;
+	unsigned long addr;
 
-	for (address = start; address <= end; address += PGDIR_SIZE) {
-		pgd_t *pgd_ref = pgd_offset_k(address);
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+		pgd_t *pgd_ref = pgd_offset_k(addr);
 		const p4d_t *p4d_ref;
 		struct page *page;
 
@@ -106,7 +106,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		 * handle synchonization on p4d level.
 		 */
 		BUILD_BUG_ON(pgd_none(*pgd_ref));
-		p4d_ref = p4d_offset(pgd_ref, address);
+		p4d_ref = p4d_offset(pgd_ref, addr);
 
 		if (p4d_none(*p4d_ref))
 			continue;
@@ -117,8 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 			p4d_t *p4d;
 			spinlock_t *pgt_lock;
 
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			p4d = p4d_offset(pgd, address);
+			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+			p4d = p4d_offset(pgd, addr);
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 6b7ce6279133..aca6295350f3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -100,5 +100,6 @@ void __init initmem_init(void)
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 
+	__vmalloc_start_set = true;
 	setup_bootmem_allocator();
 }
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6a61194ffd58..a6e21fee22ea 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
 {
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
 		.offset		= __PAGE_OFFSET,
 	};
 	unsigned long mstart, mend;

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-03-07 20:40 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-03-07 20:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: f2853308b6409b97799b0beceacd9da43a82fe43 x86/build/x86_64_defconfig: Enable CONFIG_R8169

Misc fixes and minor updates all over the place:

- An SGI/UV fix,
- a defconfig update,
- a build warning fix,
- move the boot_params file to the arch location in debugfs,
- a pkeys fix,
- selftests fix,
- boot message fixes,
- sparse fixes,
- a resume warning fix,
- ioapic hotplug fixes,
- reboot quirks,
- ... plus various minor cleanups.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/acpi/internal.h            # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  drivers/acpi/ioapic.c              # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  drivers/acpi/pci_root.c            # f2ae5da72617: x86/ioapic: Split IOAPIC hot
  tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c# 940b2f2fd963: x86/events: Remove last remn
  tools/testing/selftests/x86/fsgsbase.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/ldt_gdt.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/ptrace_syscall.c# 2a4d0c627f53: x86/selftests: Add clobbers 
  tools/testing/selftests/x86/single_step_syscall.c# 2a4d0c627f53: x86/selftests: Add clobbers 

 Thanks,

	Ingo

------------------>
Andrew Banman (1):
      x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register

Andy Shevchenko (1):
      x86/build/x86_64_defconfig: Enable CONFIG_R8169

Arnd Bergmann (1):
      x86/hyperv: Hide unused label

Borislav Petkov (3):
      x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/
      x86/events: Remove last remnants of old filenames
      x86/boot: Correct setup_header.start_sys name

Dave Hansen (1):
      x86/pkeys: Check against max pkey to avoid overflows

Dmitry Safonov (1):
      x86/selftests: Add clobbers for int80 on x86_64

Dou Liyang (2):
      x86/apic: Fix a warning message in logical CPU IDs allocation
      x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR()

Masanari Iida (2):
      x86/vmware: Remove duplicate inclusion of asm/timer.h
      x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h

Matjaz Hegedic (2):
      x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk
      x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk

Rui Wang (2):
      x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC
      x86/ioapic: Split IOAPIC hot-removal into two steps

Thomas Gleixner (1):
      x86/hpet: Prevent might sleep splat on resume

Tobin C. Harding (2):
      x86/purgatory: Make functions and variables static
      x86/purgatory: Fix sparse warning, symbol not declared


 arch/x86/configs/x86_64_defconfig                  |  1 +
 arch/x86/events/amd/core.c                         |  2 +-
 arch/x86/events/intel/cstate.c                     |  2 +-
 arch/x86/events/intel/rapl.c                       |  2 +-
 arch/x86/events/intel/uncore.h                     |  6 +++---
 arch/x86/hyperv/hv_init.c                          |  2 +-
 arch/x86/include/asm/pkeys.h                       | 15 ++++++++------
 arch/x86/include/uapi/asm/bootparam.h              |  2 +-
 arch/x86/kernel/apic/apic.c                        | 23 +++++++---------------
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c           |  1 -
 arch/x86/kernel/cpu/vmware.c                       |  1 -
 arch/x86/kernel/hpet.c                             |  2 +-
 arch/x86/kernel/kdebugfs.c                         |  2 +-
 arch/x86/kernel/reboot.c                           | 16 +++++++++++++++
 arch/x86/pci/common.c                              |  9 +++++++++
 arch/x86/platform/uv/tlb_uv.c                      |  1 -
 arch/x86/purgatory/purgatory.c                     | 11 ++++++-----
 arch/x86/purgatory/purgatory.h                     |  8 ++++++++
 arch/x86/purgatory/setup-x86_64.S                  |  1 +
 drivers/acpi/internal.h                            |  2 ++
 drivers/acpi/ioapic.c                              | 22 +++++++++++++++------
 drivers/acpi/pci_root.c                            |  4 ++--
 .../util/intel-pt-decoder/intel-pt-insn-decoder.c  |  2 +-
 tools/testing/selftests/x86/fsgsbase.c             |  2 +-
 tools/testing/selftests/x86/ldt_gdt.c              | 16 ++++++++++-----
 tools/testing/selftests/x86/ptrace_syscall.c       |  3 ++-
 tools/testing/selftests/x86/single_step_syscall.c  |  5 ++++-
 27 files changed, 106 insertions(+), 57 deletions(-)
 create mode 100644 arch/x86/purgatory/purgatory.h

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ef4a099defc..6205d3b81e6d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
+CONFIG_R8169=y
 CONFIG_FDDI=y
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index afb222b63cae..c84584bb9402 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		/* moved to perf_event_amd_uncore.c */
+		/* moved to uncore.c */
 		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index aff4b5b69d40..238ae3248ba5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
  *
  * Copyright (C) 2015, Intel Corp.
  * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22054ca49026..9d05c7e67f60 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index ad986c1e29bc..df5989f27b1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
 extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
 int snbep_uncore_pci_init(void);
 void snbep_uncore_cpu_init(void);
 int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
 int skx_uncore_pci_init(void);
 void skx_uncore_cpu_init(void);
 
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index db64baf0e500..8bef70e7f3cc 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
+register_msr_cs:
 #endif
 	/*
 	 * For 32 bit guests just use the MSR based mechanism for reading
 	 * the partition counter.
 	 */
 
-register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34684adb6899..b3b09b98896d 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline
 bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
+	/*
+	 * "Allocated" pkeys are those that have been returned
+	 * from pkey_alloc().  pkey 0 is special, and never
+	 * returned from pkey_alloc().
+	 */
+	if (pkey <= 0)
+		return false;
+	if (pkey >= arch_max_pkey())
+		return false;
 	return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
 static inline
 int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	/*
-	 * pkey 0 is special, always allocated and can never
-	 * be freed.
-	 */
-	if (!pkey)
-		return -EINVAL;
 	if (!mm_pkey_is_allocated(mm, pkey))
 		return -EINVAL;
 
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 5138dacf8bb8..07244ea16765 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
 	__u32	header;
 	__u16	version;
 	__u32	realmode_swtch;
-	__u16	start_sys;
+	__u16	start_sys_seg;
 	__u16	kernel_version;
 	__u8	type_of_loader;
 	__u8	loadflags;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4261b3282ad9..aee7deddabd0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
 static inline void __x2apic_enable(void) { }
 #endif /* !CONFIG_X86_X2APIC */
 
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (!x2apic_enabled() && skip_ioapic_setup) {
-		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-		return -1;
-	}
-#endif
-	return irq_remapping_enable();
-}
-
 void __init enable_IR_x2apic(void)
 {
 	unsigned long flags;
 	int ret, ir_stat;
 
-	if (skip_ioapic_setup)
+	if (skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
 		return;
+	}
 
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
 
 	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
-		ir_stat = try_to_enable_IR();
+		ir_stat = irq_remapping_enable();
 	/* ir_stat contains the remap mode or an error code */
 	try_to_enable_x2apic(ir_stat);
 
@@ -2062,10 +2053,10 @@ static int allocate_logical_cpuid(int apicid)
 
 	/* Allocate a new cpuid. */
 	if (nr_logical_cpuids >= nr_cpu_ids) {
-		WARN_ONCE(1, "Only %d processors supported."
+		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
 			     "Processor %d/0x%x and the rest are ignored.\n",
-			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-		return -1;
+			     nr_cpu_ids, nr_logical_cpuids, apicid);
+		return -EINVAL;
 	}
 
 	cpuid_to_apicid[nr_logical_cpuids] = apicid;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb9..759577d9d166 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -27,7 +27,6 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 891f4dad7b2c..22403a28caf5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
-#include <asm/timer.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dc6ba5bda9fc..89ff7af2de50 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 
 		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-		disable_irq(hdev->irq);
+		disable_hardirq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 		enable_irq(hdev->irq);
 	}
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index bdb83e431d89..38b64587b31b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
 	struct dentry *dbp, *version, *data;
 	int error = -ENOMEM;
 
-	dbp = debugfs_create_dir("boot_params", NULL);
+	dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
 	if (!dbp)
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e244c19a2451..4194d6f9bb29 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TAW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 0cb52ae0a8f0..190e718694b1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+	if (atomic_dec_return(&dev->enable_cnt) >= 0)
+		pcibios_disable_device(dev);
+
+}
+#endif
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 766d4d3529a1..f25982cdff90 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
 
 	ops.write_payload_first(pnode, first);
 	ops.write_payload_last(pnode, last);
-	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 25e068ba3382..b6d5c8946e66 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,6 +11,7 @@
  */
 
 #include "sha256.h"
+#include "purgatory.h"
 #include "../boot/string.h"
 
 struct sha_region {
@@ -18,11 +19,11 @@ struct sha_region {
 	unsigned long len;
 };
 
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+static unsigned long backup_dest;
+static unsigned long backup_src;
+static unsigned long backup_sz;
 
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
 
 struct sha_region sha_regions[16] = {};
 
@@ -39,7 +40,7 @@ static int copy_backup_region(void)
 	return 0;
 }
 
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
 {
 	struct sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
new file mode 100644
index 000000000000..e2e365a6c192
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.h
@@ -0,0 +1,8 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#ifndef __ASSEMBLY__
+extern void purgatory(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index fe3c91ba1bd0..f90e9dfa90bb 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
+#include "purgatory.h"
 
 	.text
 	.globl purgatory_start
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 219b90bc0922..f15900132912 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
 void acpi_memory_hotplug_init(void);
 #ifdef	CONFIG_ACPI_HOTPLUG_IOAPIC
+void pci_ioapic_remove(struct acpi_pci_root *root);
 int acpi_ioapic_remove(struct acpi_pci_root *root);
 #else
+static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; }
 static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
 #endif
 #ifdef CONFIG_ACPI_DOCK
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index 6d7ce6e12aaa..1120dfd625b8 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle)
 	return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
 }
 
-int acpi_ioapic_remove(struct acpi_pci_root *root)
+void pci_ioapic_remove(struct acpi_pci_root *root)
 {
-	int retval = 0;
 	struct acpi_pci_ioapic *ioapic, *tmp;
 
 	mutex_lock(&ioapic_list_lock);
 	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
 		if (root->device->handle != ioapic->root_handle)
 			continue;
-
-		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
-			retval = -EBUSY;
-
 		if (ioapic->pdev) {
 			pci_release_region(ioapic->pdev, 0);
 			pci_disable_device(ioapic->pdev);
 			pci_dev_put(ioapic->pdev);
 		}
+	}
+	mutex_unlock(&ioapic_list_lock);
+}
+
+int acpi_ioapic_remove(struct acpi_pci_root *root)
+{
+	int retval = 0;
+	struct acpi_pci_ioapic *ioapic, *tmp;
+
+	mutex_lock(&ioapic_list_lock);
+	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
+		if (root->device->handle != ioapic->root_handle)
+			continue;
+		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
+			retval = -EBUSY;
 		if (ioapic->res.flags && ioapic->res.parent)
 			release_resource(&ioapic->res);
 		list_del(&ioapic->list);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index bf601d4df8cf..919be0aa2578 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 
 	pci_stop_root_bus(root->bus);
 
-	WARN_ON(acpi_ioapic_remove(root));
-
+	pci_ioapic_remove(root);
 	device_set_run_wake(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
 	pci_remove_root_bus(root->bus);
+	WARN_ON(acpi_ioapic_remove(root));
 
 	dmar_device_remove(device->handle);
 
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 7913363bde5c..4f3c758d875d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
 #error Instruction buffer size too small
 #endif
 
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
 static void intel_pt_insn_decoder(struct insn *insn,
 				  struct intel_pt_insn *intel_pt_insn)
 {
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5b2b4b3c634c..b4967d875236 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
 		long ret;
 		asm volatile ("int $0x80"
 			      : "=a" (ret) : "a" (243), "b" (low_desc)
-			      : "flags");
+			      : "r8", "r9", "r10", "r11");
 		memcpy(&desc, low_desc, sizeof(desc));
 		munmap(low_desc, sizeof(desc));
 
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 4af47079cf04..f6121612e769 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
 #define AR_DB			(1 << 22)
 #define AR_G			(1 << 23)
 
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
 static int nerrs;
 
 /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
 	asm volatile ("int $0x80"
 		      : "=a" (ret), "+m" (low_user_desc) :
 			"a" (243), "b" (low_user_desc)
-		      : "flags");
+		      : INT80_CLOBBERS);
 	return ret;
 }
 
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9cf116..eaea92439708 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
 	asm volatile ("int $0x80"
 		      : "+a" (args->nr),
 			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
 	args->arg5 = bp;
 #else
 	sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c26358e8b7..a48da95c18fd 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
 #else
 # define REG_IP REG_EIP
 # define WIDTH "l"
+# define INT80_CLOBBERS
 #endif
 
 static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
 
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
-	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
 	check_result();
 
 	/*

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-02-28  8:08 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-02-28  8:08 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 8312593a55941a0fae2b09731a4f91d87bd796db Merge branches 'x86/cache', 'x86/debug' and 'x86/irq' into x86/urgent

Two documentation updates, plus a debugging annotation fix.

 Thanks,

	Ingo

------------------>
Daniel Bristot de Oliveira (1):
      x86/irq, trace: Add __irq_entry annotation to x86's platform IRQ handlers

Marcelo Tosatti (1):
      Documentation, x86, resctrl: Recommend locking for resctrlfs

Xunlei Pang (1):
      x86/crash: Update the stale comment in reserve_crashkernel()


 Documentation/x86/intel_rdt_ui.txt       | 114 +++++++++++++++++++++++++++++++
 arch/x86/kernel/apic/apic.c              |   8 +--
 arch/x86/kernel/apic/vector.c            |   2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c     |   4 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c |   6 +-
 arch/x86/kernel/cpu/mcheck/threshold.c   |   4 +-
 arch/x86/kernel/irq.c                    |   4 +-
 arch/x86/kernel/irq_work.c               |   5 +-
 arch/x86/kernel/setup.c                  |   4 +-
 arch/x86/kernel/smp.c                    |  15 ++--
 10 files changed, 144 insertions(+), 22 deletions(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index d918d268cd72..51cf6fa5591f 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -212,3 +212,117 @@ Finally we move core 4-7 over to the new group and make sure that the
 kernel and the tasks running there get 50% of the cache.
 
 # echo C0 > p0/cpus
+
+4) Locking between applications
+
+Certain operations on the resctrl filesystem, composed of read/writes
+to/from multiple files, must be atomic.
+
+As an example, the allocation of an exclusive reservation of L3 cache
+involves:
+
+  1. Read the cbmmasks from each directory
+  2. Find a contiguous set of bits in the global CBM bitmask that is clear
+     in any of the directory cbmmasks
+  3. Create a new directory
+  4. Set the bits found in step 2 to the new directory "schemata" file
+
+If two applications attempt to allocate space concurrently then they can
+end up allocating the same bits so the reservations are shared instead of
+exclusive.
+
+To coordinate atomic operations on the resctrlfs and to avoid the problem
+above, the following locking procedure is recommended:
+
+Locking is based on flock, which is available in libc and also as a shell
+script command
+
+Write lock:
+
+ A) Take flock(LOCK_EX) on /sys/fs/resctrl
+ B) Read/write the directory structure.
+ C) funlock
+
+Read lock:
+
+ A) Take flock(LOCK_SH) on /sys/fs/resctrl
+ B) If success read the directory structure.
+ C) funlock
+
+Example with bash:
+
+# Atomically read directory structure
+$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
+
+# Read directory contents and create new subdirectory
+
+$ cat create-dir.sh
+find /sys/fs/resctrl/ > output.txt
+mask = function-of(output.txt)
+mkdir /sys/fs/resctrl/newres/
+echo mask > /sys/fs/resctrl/newres/schemata
+
+$ flock /sys/fs/resctrl/ ./create-dir.sh
+
+Example with C:
+
+/*
+ * Example code do take advisory locks
+ * before accessing resctrl filesystem
+ */
+#include <sys/file.h>
+#include <stdlib.h>
+
+void resctrl_take_shared_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_SH);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_take_exclusive_lock(int fd)
+{
+	int ret;
+
+	/* release lock on resctrl filesystem */
+	ret = flock(fd, LOCK_EX);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_release_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_UN);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void main(void)
+{
+	int fd, ret;
+
+	fd = open("/sys/fs/resctrl", O_DIRECTORY);
+	if (fd == -1) {
+		perror("open");
+		exit(-1);
+	}
+	resctrl_take_shared_lock(fd);
+	/* code to read directory contents */
+	resctrl_release_lock(fd);
+
+	resctrl_take_exclusive_lock(fd);
+	/* code to read and write directory contents */
+	resctrl_release_lock(fd);
+}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5b7e43eff139..30b122987906 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1864,14 +1864,14 @@ static void __smp_spurious_interrupt(u8 vector)
 		"should never happen.\n", vector, smp_processor_id());
 }
 
-__visible void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
-__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
 	u8 vector = ~regs->orig_ax;
 
@@ -1922,14 +1922,14 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 
 }
 
-__visible void smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_error_interrupt(regs);
 	exiting_irq();
 }
 
-__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_error_apic_entry(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5d30c5e42bb1..f3557a1eb562 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		__send_cleanup_vector(data);
 }
 
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a5fd137417a2..9e655292cf10 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -814,14 +814,14 @@ static inline void __smp_deferred_error_interrupt(void)
 	deferred_error_int_vector();
 }
 
-asmlinkage __visible void smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
 {
 	entering_irq();
 	__smp_deferred_error_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
 {
 	entering_irq();
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 465aca8be009..772d9400930d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -404,14 +404,16 @@ static inline void __smp_thermal_interrupt(void)
 	smp_thermal_vector();
 }
 
-asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_thermal_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_trace_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 9beb092d68a5..bb0e75eed10a 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void)
 	mce_threshold_vector();
 }
 
-asmlinkage __visible void smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
 {
 	entering_irq();
 	__smp_threshold_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
 {
 	entering_irq();
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7c6e9ffe4424..4d8183b5f113 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void)
 		x86_platform_ipi_callback();
 }
 
-__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
 }
 #endif
 
-__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 3512ba607361..275487872be2 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -9,6 +9,7 @@
 #include <linux/hardirq.h>
 #include <asm/apic.h>
 #include <asm/trace/irq_vectors.h>
+#include <linux/interrupt.h>
 
 static inline void __smp_irq_work_interrupt(void)
 {
@@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void)
 	irq_work_run();
 }
 
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_irq_work_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4cfba947d774..eb69b14dbfc8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void)
 	/* 0 means: find the address automatically */
 	if (crash_base <= 0) {
 		/*
-		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+		 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
+		 * as old kexec-tools loads bzImage below that, unless
+		 * "crashkernel=size[KMG],high" is specified.
 		 */
 		crash_base = memblock_find_in_range(CRASH_ALIGN,
 						    high ? CRASH_ADDR_HIGH_MAX
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc222f25..d3c66a15bbde 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void)
 	scheduler_ipi();
 }
 
-__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
@@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 	 */
 }
 
-__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
 {
 	/*
 	 * Need to call irq_enter() before calling the trace point.
@@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_single_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-02-11 18:18 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-02-11 18:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 146fbb766934dc003fcbf755b519acef683576bf x86/mm/ptdump: Fix soft lockup in page table walker

Last minute x86 fixes:

 - Fix a softlockup detector warning and long delays if using ptdump with KASAN
   enabled.

 - Two more TSC-adjust fixes for interesting firmware interactions.

 - Two commits to fix an AMD CPU topology enumeration bug that caused a 
   measurable gaming performance regression.

 Thanks,

	Ingo

------------------>
Andrey Ryabinin (1):
      x86/mm/ptdump: Fix soft lockup in page table walker

Borislav Petkov (1):
      x86/CPU/AMD: Bring back Compute Unit ID

Thomas Gleixner (2):
      x86/tsc: Avoid the large time jump when sanitizing TSC ADJUST
      x86/tsc: Make the TSC ADJUST sanitizing work for tsc_reliable

Yazen Ghannam (1):
      x86/CPU/AMD: Fix Zen SMT topology


 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/amd.c        | 16 +++++++++++++++-
 arch/x86/kernel/cpu/common.c     |  1 +
 arch/x86/kernel/smpboot.c        | 12 +++++++++---
 arch/x86/kernel/tsc.c            |  5 +++--
 arch/x86/kernel/tsc_sync.c       | 16 +++++++---------
 arch/x86/mm/dump_pagetables.c    |  2 ++
 7 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..e6cfe7ba2d65 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -104,6 +104,7 @@ struct cpuinfo_x86 {
 	__u8			x86_phys_bits;
 	/* CPUID returned core id bits: */
 	__u8			x86_coreid_bits;
+	__u8			cu_id;
 	/* Max extended CPUID function supported: */
 	__u32			extended_cpuid_level;
 	/* Maximum supported CPUID level, -1=no CPUID: */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1d3167269a67..2b4cf04239b6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,8 +309,22 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
 
-		node_id = cpuid_ecx(0x8000001e) & 7;
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+		node_id  = ecx & 0xff;
+		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+
+		if (c->x86 == 0x15)
+			c->cu_id = ebx & 0xff;
+
+		if (c->x86 >= 0x17) {
+			c->cpu_core_id = ebx & 0xff;
+
+			if (smp_num_siblings > 1)
+				c->x86_max_cores /= smp_num_siblings;
+		}
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bab7a8a4293..ede03e849a8b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_max_cores = 1;
 	c->x86_coreid_bits = 0;
+	c->cu_id = 0xff;
 #ifdef CONFIG_X86_64
 	c->x86_clflush_size = 64;
 	c->x86_phys_bits = 36;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..99b920d0e516 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 		int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
 
 		if (c->phys_proc_id == o->phys_proc_id &&
-		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
-		    c->cpu_core_id == o->cpu_core_id)
-			return topology_sane(c, o, "smt");
+		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
+			if (c->cpu_core_id == o->cpu_core_id)
+				return topology_sane(c, o, "smt");
+
+			if ((c->cu_id != 0xff) &&
+			    (o->cu_id != 0xff) &&
+			    (c->cu_id == o->cu_id))
+				return topology_sane(c, o, "smt");
+		}
 
 	} else if (c->phys_proc_id == o->phys_proc_id &&
 		   c->cpu_core_id == o->cpu_core_id) {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e41af597aed8..37e7cf544e51 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1356,6 +1356,9 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
+	tsc_store_and_check_tsc_adjust(true);
+
 	/*
 	 * Secondary CPUs do not run through tsc_init(), so set up
 	 * all the scale factors for all CPUs, assuming the same
@@ -1386,8 +1389,6 @@ void __init tsc_init(void)
 
 	if (unsynchronized_tsc())
 		mark_tsc_unstable("TSCs unsynchronized");
-	else
-		tsc_store_and_check_tsc_adjust(true);
 
 	check_system_tsc_reliable();
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index d0db011051a5..728f75378475 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
-	if (tsc_clocksource_reliable) {
-		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
-			pr_info(
-			"Skipped synchronization checks as TSC is reliable.\n");
-		return;
-	}
-
 	/*
 	 * Set the maximum number of test runs to
 	 *  1 if the CPU does not provide the TSC_ADJUST MSR
@@ -380,14 +373,19 @@ void check_tsc_sync_target(void)
 	int cpus = 2;
 
 	/* Also aborts if there is no TSC. */
-	if (unsynchronized_tsc() || tsc_clocksource_reliable)
+	if (unsynchronized_tsc())
 		return;
 
 	/*
 	 * Store, verify and sanitize the TSC adjust register. If
 	 * successful skip the test.
+	 *
+	 * The test is also skipped when the TSC is marked reliable. This
+	 * is true for SoCs which have no fallback clocksource. On these
+	 * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
+	 * register might have been wreckaged by the BIOS..
 	 */
-	if (tsc_store_and_check_tsc_adjust(false)) {
+	if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
 		atomic_inc(&skip_test);
 		return;
 	}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..8aa6bea1cd6c 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -15,6 +15,7 @@
 #include <linux/debugfs.h>
 #include <linux/mm.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
@@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 		} else
 			note_page(m, &st, __pgprot(0), 1);
 
+		cond_resched();
 		start++;
 	}
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-02-02 21:04 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-02-02 21:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0becc0ae5b42828785b589f686725ff5bc3b9b25 x86/mce: Make timer handling more robust

Misc fixes:

 - two microcode loader fixes

 - two FPU xstate handling fixes

 - an MCE timer handling related crash fix

 Thanks,

	Ingo

------------------>
Borislav Petkov (2):
      x86/microcode/intel: Drop stashed AP patch pointer optimization
      x86/microcode: Do not access the initrd after it has been freed

Kevin Hao (1):
      x86/fpu: Set the xcomp_bv when we fake up a XSAVES area

Thomas Gleixner (1):
      x86/mce: Make timer handling more robust

Yu-cheng Yu (1):
      x86/fpu/xstate: Fix xcomp_bv in XSAVES header


 arch/x86/include/asm/microcode.h      |  1 +
 arch/x86/kernel/cpu/mcheck/mce.c      | 31 ++++++++++++-------------------
 arch/x86/kernel/cpu/microcode/amd.c   |  5 +++--
 arch/x86/kernel/cpu/microcode/core.c  | 22 +++++++++++++++++-----
 arch/x86/kernel/cpu/microcode/intel.c |  9 +--------
 arch/x86/kernel/fpu/core.c            |  4 +++-
 6 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 38711df3bcb5..2266f864b747 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 void reload_early_microcode(void);
 extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+extern bool initrd_gone;
 #else
 static inline int __init microcode_init(void)			{ return 0; };
 static inline void __init load_ucode_bsp(void)			{ }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef43233e03..537c6647d84c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
 
 static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
 {
 	unsigned long when = jiffies + interval;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	if (timer_pending(t)) {
-		if (time_before(when, t->expires))
-			mod_timer(t, when);
-	} else {
-		t->expires = round_jiffies(when);
-		add_timer_on(t, smp_processor_id());
-	}
+	if (!timer_pending(t) || time_before(when, t->expires))
+		mod_timer(t, round_jiffies(when));
 
 	local_irq_restore(flags);
 }
@@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data)
 
 done:
 	__this_cpu_write(mce_next_interval, iv);
-	__restart_timer(t, iv);
+	__start_timer(t, iv);
 }
 
 /*
@@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval)
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned long iv = __this_cpu_read(mce_next_interval);
 
-	__restart_timer(t, interval);
+	__start_timer(t, interval);
 
 	if (interval < iv)
 		__this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
 {
 	unsigned long iv = check_interval * HZ;
 
 	if (mca_cfg.ignore_ce || !iv)
 		return;
 
-	per_cpu(mce_next_interval, cpu) = iv;
-
-	t->expires = round_jiffies(jiffies + iv);
-	add_timer_on(t, cpu);
+	this_cpu_write(mce_next_interval, iv);
+	__start_timer(t, iv);
 }
 
 static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void)
 	unsigned int cpu = smp_processor_id();
 
 	setup_pinned_timer(t, mce_timer_fn, cpu);
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 }
 
 /* Handle unconfigured int18 (should never happen) */
@@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu)
 
 static int mce_cpu_online(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	int ret;
 
 	mce_device_create(cpu);
@@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu)
 		return ret;
 	}
 	mce_reenable_cpu();
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 	return 0;
 }
 
 static int mce_cpu_pre_down(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 
 	mce_disable_cpu();
 	del_timer_sync(t);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6a31e2691f3a..079e81733a58 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family)
 reget:
 		if (!get_builtin_microcode(&cp, family)) {
 #ifdef CONFIG_BLK_DEV_INITRD
-			cp = find_cpio_data(ucode_path, (void *)initrd_start,
-					    initrd_end - initrd_start, NULL);
+			if (!initrd_gone)
+				cp = find_cpio_data(ucode_path, (void *)initrd_start,
+						    initrd_end - initrd_start, NULL);
 #endif
 			if (!(cp.data && cp.size)) {
 				/*
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2af69d27da62..73102d932760 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -46,6 +46,8 @@
 static struct microcode_ops	*microcode_ops;
 static bool dis_ucode_ldr = true;
 
+bool initrd_gone;
+
 LIST_HEAD(microcode_cache);
 
 /*
@@ -190,21 +192,24 @@ void load_ucode_ap(void)
 static int __init save_microcode_in_initrd(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	int ret = -EINVAL;
 
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		if (c->x86 >= 6)
-			return save_microcode_in_initrd_intel();
+			ret = save_microcode_in_initrd_intel();
 		break;
 	case X86_VENDOR_AMD:
 		if (c->x86 >= 0x10)
-			return save_microcode_in_initrd_amd(c->x86);
+			ret = save_microcode_in_initrd_amd(c->x86);
 		break;
 	default:
 		break;
 	}
 
-	return -EINVAL;
+	initrd_gone = true;
+
+	return ret;
 }
 
 struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
@@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 	 * has the virtual address of the beginning of the initrd. It also
 	 * possibly relocates the ramdisk. In either case, initrd_start contains
 	 * the updated address so use that instead.
+	 *
+	 * initrd_gone is for the hotplug case where we've thrown out initrd
+	 * already.
 	 */
-	if (!use_pa && initrd_start)
-		start = initrd_start;
+	if (!use_pa) {
+		if (initrd_gone)
+			return (struct cpio_data){ NULL, 0, "" };
+		if (initrd_start)
+			start = initrd_start;
+	}
 
 	return find_cpio_data(path, (void *)start, size, NULL);
 #else /* !CONFIG_BLK_DEV_INITRD */
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3f329b74e040..8325d8a09ab0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -41,7 +41,7 @@
 
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
-/* Current microcode patch used in early patching */
+/* Current microcode patch used in early patching on the APs. */
 struct microcode_intel *intel_ucode_patch;
 
 static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
@@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void)
 	struct ucode_cpu_info uci;
 	struct cpio_data cp;
 
-	/*
-	 * AP loading didn't find any microcode patch, no need to save anything.
-	 */
-	if (!intel_ucode_patch || IS_ERR(intel_ucode_patch))
-		return 0;
-
 	if (!load_builtin_intel_microcode(&cp))
 		cp = find_microcode_in_initrd(ucode_path, false);
 
@@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void)
 	return 0;
 }
 
-
 /*
  * @res_patch, output: a pointer to the patch we found.
  */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e4e97a5355ce..de7234401275 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -9,6 +9,7 @@
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/types.h>
+#include <asm/fpu/xstate.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
@@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state)
 	 * it will #GP. Make sure it is replaced after the memset().
 	 */
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+					       xfeatures_mask;
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2017-01-15 10:06 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2017-01-15 10:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	Peter Zijlstra

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 453828625731d0ba7218242ef6ec88f59408f368 x86/mpx: Use compatible types in comparison to fix sparse error

Misc fixes:

 - unwinder fixes
 - AMD CPU topology enumeration fixes
 - microcode loader fixes
 - x86 embedded platform fixes
 - fix for a bootup crash that may trigger when clearcpuid= is used with invalid values

 Thanks,

	Ingo

------------------>
Andy Shevchenko (2):
      x86/cpu: Fix typo in the comment for Anniedale
      x86/platform/intel-mid: Rename 'spidev' to 'mrfld_spidev'

Borislav Petkov (4):
      x86/CPU/AMD: Fix Bulldozer topology
      x86/CPU: Add native CPUID variants returning a single datum
      x86/microcode: Use native CPUID to tickle out microcode revision
      x86/microcode/intel: Add a helper which gives the microcode revision

Josh Poimboeuf (4):
      x86/unwind: Silence warnings for non-current tasks
      x86/unwind: Disable KASAN checks for non-current tasks
      x86/unwind: Include __schedule() in stack traces
      x86/entry: Fix the end of the stack for newly forked tasks

Junichi Nomura (2):
      x86/microcode/intel: Fix allocation size of struct ucode_patch
      x86/microcode/intel: Use correct buffer size for saving microcode data

Len Brown (1):
      x86/tsc: Add the Intel Denverton Processor to native_calibrate_tsc()

Lukasz Odzioba (1):
      x86/cpu: Fix bootup crashes by sanitizing the argument of the 'clearcpuid=' command-line option

Nicholas Mc Guire (1):
      x86/boot: Add missing declaration of string functions

Tobias Klauser (1):
      x86/mpx: Use compatible types in comparison to fix sparse error


 arch/x86/boot/string.c                             |  1 +
 arch/x86/boot/string.h                             |  9 +++
 arch/x86/entry/entry_32.S                          | 30 ++++------
 arch/x86/entry/entry_64.S                          | 11 ++--
 arch/x86/include/asm/intel-family.h                |  2 +-
 arch/x86/include/asm/microcode_intel.h             | 15 +++++
 arch/x86/include/asm/processor.h                   | 18 ++++++
 arch/x86/include/asm/stacktrace.h                  |  2 +-
 arch/x86/include/asm/switch_to.h                   | 10 +++-
 arch/x86/kernel/cpu/amd.c                          |  9 +--
 arch/x86/kernel/cpu/common.c                       |  2 +-
 arch/x86/kernel/cpu/intel.c                        | 11 +---
 arch/x86/kernel/cpu/microcode/intel.c              | 70 ++++++----------------
 arch/x86/kernel/tsc.c                              |  1 +
 arch/x86/kernel/unwind_frame.c                     | 30 +++++++++-
 arch/x86/mm/mpx.c                                  |  2 +-
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +-
 .../{platform_spidev.c => platform_mrfld_spidev.c} |  4 ++
 18 files changed, 129 insertions(+), 100 deletions(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_spidev.c => platform_mrfld_spidev.c} (91%)

diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd583dce1..9e240fcba784 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include "ctype.h"
+#include "string.h"
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 725e820602b1..113588ddb43f 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len);
 #define memset(d,c,l) __builtin_memset(d,c,l)
 #define memcmp	__builtin_memcmp
 
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+					  unsigned int base);
+
 #endif /* BOOT_STRING_H */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 701d29f8e4d3..57f7ec35216e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -255,23 +255,6 @@ ENTRY(__switch_to_asm)
 END(__switch_to_asm)
 
 /*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-ENTRY(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	ret
-ENDPROC(schedule_tail_wrapper)
-/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	call	schedule_tail_wrapper
+	FRAME_BEGIN		/* help unwinder find end of stack */
+
+	/*
+	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
+	 * on the stack.
+	 */
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
+	leal	FRAME_OFFSET(%esp), %eax
 	call    syscall_return_slowpath
+	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5b219707c2f2..044d18ebc43c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -408,17 +409,19 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
+	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
+	call	schedule_tail		/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
+	testq	%rbx, %rbx		/* from kernel_thread? */
+	jnz	1f			/* kernel threads are uncommon */
 
 2:
-	movq	%rsp, %rdi
+	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
+	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc076d3..8167fdb67ae8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -57,7 +57,7 @@
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 195becc6f780..e793fc9a9b20 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,6 +52,21 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eaf100508c36..1be64da0384e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	    : "memory");
 }
 
+#define native_cpuid_reg(reg)					\
+static inline unsigned int native_cpuid_##reg(unsigned int op)	\
+{								\
+	unsigned int eax = op, ebx, ecx = 0, edx;		\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum.
+ */
+native_cpuid_reg(eax)
+native_cpuid_reg(ebx)
+native_cpuid_reg(ecx)
+native_cpuid_reg(edx)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
 	write_cr3(__pa(pgdir));
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a3269c897ec5..2e41c50ddf47 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -58,7 +58,7 @@ get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 	if (task == current)
 		return __builtin_frame_address(0);
 
-	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
+	return &((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
 static inline unsigned long *
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5cb436acd463..fcc5cd387fd1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev,
 
 asmlinkage void ret_from_fork(void);
 
-/* data that is pointed to by thread.sp */
+/*
+ * This is the structure pointed to by thread.sp for an inactive task.  The
+ * order of the fields must match the code in __switch_to_asm().
+ */
 struct inactive_task_frame {
 #ifdef CONFIG_X86_64
 	unsigned long r15;
@@ -48,6 +51,11 @@ struct inactive_task_frame {
 	unsigned long di;
 #endif
 	unsigned long bx;
+
+	/*
+	 * These two fields must be together.  They form a stack frame header,
+	 * needed by get_frame_pointer().
+	 */
 	unsigned long bp;
 	unsigned long ret_addr;
 };
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 71cae73a5076..1d3167269a67 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
 
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		node_id = ecx & 7;
-
-		/* get compute unit information */
-		smp_num_siblings = ((ebx >> 8) & 3) + 1;
-		c->x86_max_cores /= smp_num_siblings;
-		c->cpu_core_id = ebx & 0xff;
+		node_id = cpuid_ecx(0x8000001e) & 7;
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dc1697ca5191..9bab7a8a4293 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg)
 {
 	int bit;
 
-	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
 		setup_clear_cpu_cap(bit);
 	else
 		return 0;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d2bb03..203f860d2ab3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -14,6 +14,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
+#include <asm/microcode_intel.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
-	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
-		unsigned lower_word;
-
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		/* Required by the SDM */
-		sync_core();
-		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
-	}
+	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
+		c->microcode = intel_get_microcode_revision();
 
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index b624b54912e1..3f329b74e040 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
@@ -368,26 +368,6 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 	return patch;
 }
 
-static void cpuid_1(void)
-{
-	/*
-	 * According to the Intel SDM, Volume 3, 9.11.7:
-	 *
-	 *   CPUID returns a value in a model specific register in
-	 *   addition to its usual register return values. The
-	 *   semantics of CPUID cause it to deposit an update ID value
-	 *   in the 64-bit model-specific register at address 08BH
-	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
-	 *   processor, the value in the MSR remains unmodified.
-	 *
-	 * Use native_cpuid -- this code runs very early and we don't
-	 * want to mess with paravirt.
-	 */
-	unsigned int eax = 1, ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-}
-
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -410,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
 
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
+	csig.rev = intel_get_microcode_revision();
 
 	uci->cpu_sig = csig;
 	uci->valid = 1;
@@ -602,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci)
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
 	struct microcode_intel *mc;
-	unsigned int val[2];
+	u32 rev;
 
 	mc = uci->mc;
 	if (!mc)
@@ -610,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 
 	/* write microcode via MSR 0x79 */
 	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc->hdr.rev)
+	rev = intel_get_microcode_revision();
+	if (rev != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
 	/* Flush global tlb. This is precaution. */
 	flush_tlb_early();
 #endif
-	uci->cpu_sig.rev = val[1];
+	uci->cpu_sig.rev = rev;
 
 	if (early)
 		print_ucode(uci);
@@ -804,8 +772,8 @@ static int apply_microcode_intel(int cpu)
 	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
 	struct cpuinfo_x86 *c;
-	unsigned int val[2];
 	static int prev_rev;
+	u32 rev;
 
 	/* We should bind the task to the CPU */
 	if (WARN_ON(raw_smp_processor_id() != cpu))
@@ -822,33 +790,28 @@ static int apply_microcode_intel(int cpu)
 
 	/* write microcode via MSR 0x79 */
 	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
-	wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	cpuid_1();
 
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+	rev = intel_get_microcode_revision();
 
-	if (val[1] != mc->hdr.rev) {
+	if (rev != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
 		       cpu, mc->hdr.rev);
 		return -1;
 	}
 
-	if (val[1] != prev_rev) {
+	if (rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
-			val[1],
+			rev,
 			mc->hdr.date & 0xffff,
 			mc->hdr.date >> 24,
 			(mc->hdr.date >> 16) & 0xff);
-		prev_rev = val[1];
+		prev_rev = rev;
 	}
 
 	c = &cpu_data(cpu);
 
-	uci->cpu_sig.rev = val[1];
-	c->microcode = val[1];
+	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
 
 	return 0;
 }
@@ -860,7 +823,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
-	unsigned int curr_mc_size = 0;
+	unsigned int curr_mc_size = 0, new_mc_size = 0;
 	unsigned int csig, cpf;
 
 	while (leftover) {
@@ -901,6 +864,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			vfree(new_mc);
 			new_rev = mc_header.rev;
 			new_mc  = mc;
+			new_mc_size = mc_size;
 			mc = NULL;	/* trigger new vmalloc */
 		}
 
@@ -926,7 +890,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	 * permanent memory. So it will be loaded early when a CPU is hot added
 	 * or resumes.
 	 */
-	save_mc_for_early(new_mc, curr_mc_size);
+	save_mc_for_early(new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index be3a49ee0356..e41af597aed8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void)
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
 		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
 		case INTEL_FAM6_ATOM_GOLDMONT:
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 4443e499f279..23d15565d02a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,21 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+
 static void unwind_dump(struct unwind_state *state, unsigned long *sp)
 {
 	static bool dumped_before = false;
@@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		return 0;
 
-	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+	addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
+	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr,
 				     addr_p);
 
 	return __kernel_text_address(addr) ? addr : 0;
@@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;
 	else
-		next_bp = (unsigned long *)*state->bp;
+		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp);
 
 	/* is the next frame pointer an encoded pointer to pt_regs? */
 	regs = decode_frame_pointer(next_bp);
@@ -207,6 +223,16 @@ bool unwind_next_frame(struct unwind_state *state)
 	return true;
 
 bad_address:
+	/*
+	 * When unwinding a non-current task, the task might actually be
+	 * running on another CPU, in which case it could be modifying its
+	 * stack while we're reading it.  This is generally not a problem and
+	 * can be ignored as long as the caller understands that unwinding
+	 * another task will not always succeed.
+	 */
+	if (state->task != current)
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 324e5713d386..af59f808742f 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 	 * We were not able to extract an address from the instruction,
 	 * probably because there was something invalid in it.
 	 */
-	if (info->si_addr == (void *)-1) {
+	if (info->si_addr == (void __user *)-1) {
 		err = -EINVAL;
 		goto err_out;
 	}
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 61b5ed2b7d40..90e4f2a6625b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
 # SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
similarity index 91%
rename from arch/x86/platform/intel-mid/device_libs/platform_spidev.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 30c601b399ee..27186ad654c9 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -11,6 +11,7 @@
  * of the License.
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/sfi.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info)
 {
 	struct spi_board_info *spi_info = info;
 
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return ERR_PTR(-ENODEV);
+
 	spi_info->mode = SPI_MODE_0;
 	spi_info->controller_data = &spidev_spi_chip;
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-12-23 22:57 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-12-23 22:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Borislav Petkov, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: c280f7736ab26a601932b1ce017a3840dbedcfdc Revert "x86/unwind: Detect bad stack return address"

There's a number of fixes:

 - a round of fixes for CPUID-less legacy CPUs
 - a number of microcode loader fixes
 - i8042 detection robustization fixes
 - stack dump/unwinder fixes
 - x86 SoC platform driver fixes
 - a GCC 7 warning fix
 - virtualization related fixes

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/input/serio/i8042-x86ia64io.h# d79e141c1c6e: Input: i8042 - Trust firmwar

 Thanks,

	Ingo

------------------>
Andy Lutomirski (5):
      x86/cpu: Probe CPUID leaf 6 even when cpuid_level == 6
      x86/asm/32: Make sync_core() handle missing CPUID on all 32-bit kernels
      Revert "x86/boot: Fail the boot if !M486 and CPUID is missing"
      x86/microcode/intel: Replace sync_core() with native_cpuid()
      x86/asm: Rewrite sync_core() to use IRET-to-self

Boris Ostrovsky (1):
      x86/microcode/AMD: Use native_cpuid() in load_ucode_amd_bsp()

Borislav Petkov (6):
      x86/microcode/AMD: Make find_proper_container() sane again
      x86/microcode/AMD: Sanitize apply_microcode_early_amd()
      x86/microcode/AMD: Do not load when running on a hypervisor
      x86/topology: Document cpu_llc_id
      x86/alternatives: Do not use sync_core() to serialize I$
      x86/microcode/AMD: Reload proper initrd start address

Dmitry Torokhov (4):
      x86/init: Add i8042 state to the platform data
      Input: i8042 - Trust firmware a bit more when probing on X86
      x86/init: Remove i8042_detect() from platform ops
      x86/init: Fix a couple of comment typos

Josh Poimboeuf (3):
      x86/unwind: Adjust last frame check for aligned function stacks
      x86/unwind: Dump stack data on warnings
      Revert "x86/unwind: Detect bad stack return address"

Linus Walleij (1):
      x86/platform/intel-mid: Switch MPU3050 driver to IIO

Markus Trippelsdorf (1):
      x86/tools: Fix gcc-7 warning in relocs.c

Nicolas Iooss (1):
      x86/platform/intel/quark: Add printf attribute to imr_self_test_result()

Peter Zijlstra (1):
      x86/paravirt: Mark unused patch_default label

Vitaly Kuznetsov (1):
      x86/hyperv: Handle unknown NMIs on one CPU when unknown_nmi_panic


 Documentation/x86/topology.txt                   |  9 +++
 arch/x86/boot/cpu.c                              |  6 --
 arch/x86/include/asm/processor.h                 | 80 +++++++++++++++++-------
 arch/x86/include/asm/unwind.h                    |  2 +-
 arch/x86/include/asm/x86_init.h                  | 26 ++++++--
 arch/x86/kernel/acpi/boot.c                      |  7 +++
 arch/x86/kernel/alternative.c                    | 15 +++--
 arch/x86/kernel/cpu/common.c                     |  7 ++-
 arch/x86/kernel/cpu/microcode/amd.c              | 56 ++++++++++-------
 arch/x86/kernel/cpu/microcode/core.c             | 40 +++++++-----
 arch/x86/kernel/cpu/microcode/intel.c            | 26 +++++++-
 arch/x86/kernel/cpu/mshyperv.c                   | 24 +++++++
 arch/x86/kernel/paravirt_patch_32.c              |  2 +-
 arch/x86/kernel/paravirt_patch_64.c              |  2 +-
 arch/x86/kernel/platform-quirks.c                |  5 ++
 arch/x86/kernel/unwind_frame.c                   | 56 ++++++++++++++---
 arch/x86/kernel/x86_init.c                       |  2 -
 arch/x86/platform/ce4100/ce4100.c                |  6 --
 arch/x86/platform/intel-mid/device_libs/Makefile |  2 +-
 arch/x86/platform/intel-mid/intel-mid.c          |  7 ---
 arch/x86/platform/intel-quark/imr_selftest.c     |  3 +-
 arch/x86/tools/relocs.c                          |  3 +-
 drivers/input/serio/i8042-x86ia64io.h            | 10 ++-
 23 files changed, 279 insertions(+), 117 deletions(-)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index 06afac252f5b..f3e9d7e9ed6c 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -63,6 +63,15 @@ historical nature and should be cleaned up.
     The maximum possible number of packages in the system. Helpful for per
     package facilities to preallocate per package information.
 
+  - cpu_llc_id:
+
+    A per-CPU variable containing:
+    - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
+    Cache
+
+    - On AMD, the Node ID or Core Complex ID containing the Last Level
+    Cache. In general, it is a number identifying an LLC uniquely on the
+    system.
 
 * Cores:
 
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 4224ede43b4e..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -87,12 +87,6 @@ int validate_cpu(void)
 		return -1;
 	}
 
-	if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
-	    !has_eflag(X86_EFLAGS_ID)) {
-		printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
-		return -1;
-	}
-
 	if (err_flags) {
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6aa741fbe1df..eaf100508c36 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution and prefetching of modified code. */
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ *  a) Text was modified using one virtual address and is about to be executed
+ *     from the same physical page at a different virtual address.
+ *
+ *  b) Text was modified on a different CPU, may subsequently be
+ *     executed on this CPU, and you want to make sure the new version
+ *     gets executed.  This generally means you're calling this in a IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ */
 static inline void sync_core(void)
 {
-	int tmp;
-
-#ifdef CONFIG_M486
 	/*
-	 * Do a CPUID if available, otherwise do a jump.  The jump
-	 * can conveniently enough be the jump around CPUID.
+	 * There are quite a few ways to do this.  IRET-to-self is nice
+	 * because it works on every CPU, at any CPL (so it's compatible
+	 * with paravirtualization), and it never exits to a hypervisor.
+	 * The only down sides are that it's a bit slow (it seems to be
+	 * a bit more than 2x slower than the fastest options) and that
+	 * it unmasks NMIs.  The "push %cs" is needed because, in
+	 * paravirtual environments, __KERNEL_CS may not be a valid CS
+	 * value when we do IRET directly.
+	 *
+	 * In case NMI unmasking or performance ever becomes a problem,
+	 * the next best option appears to be MOV-to-CR2 and an
+	 * unconditional jump.  That sequence also works on all CPUs,
+	 * but it will fault at CPL3 (i.e. Xen PV and lguest).
+	 *
+	 * CPUID is the conventional way, but it's nasty: it doesn't
+	 * exist on some 486-like CPUs, and it usually exits to a
+	 * hypervisor.
+	 *
+	 * Like all of Linux's memory ordering operations, this is a
+	 * compiler barrier as well.
 	 */
-	asm volatile("cmpl %2,%1\n\t"
-		     "jl 1f\n\t"
-		     "cpuid\n"
-		     "1:"
-		     : "=a" (tmp)
-		     : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_32
+	asm volatile (
+		"pushfl\n\t"
+		"pushl %%cs\n\t"
+		"pushl $1f\n\t"
+		"iret\n\t"
+		"1:"
+		: "+r" (__sp) : : "memory");
 #else
-	/*
-	 * CPUID is a barrier to speculative execution.
-	 * Prefetched instructions are automatically
-	 * invalidated when modified.
-	 */
-	asm volatile("cpuid"
-		     : "=a" (tmp)
-		     : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+	unsigned int tmp;
+
+	asm volatile (
+		"mov %%ss, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq %%rsp\n\t"
+		"addq $8, (%%rsp)\n\t"
+		"pushfq\n\t"
+		"mov %%cs, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq $1f\n\t"
+		"iretq\n\t"
+		"1:"
+		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index c5a7f3a930dd..6fa75b17aec3 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,7 +12,7 @@ struct unwind_state {
 	struct task_struct *task;
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
-	unsigned long *bp;
+	unsigned long *bp, *orig_sp;
 	struct pt_regs *regs;
 #else
 	unsigned long *sp;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6ba793178441..7ba7e90a9ad6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -59,7 +59,7 @@ struct x86_init_irqs {
 
 /**
  * struct x86_init_oem - oem platform specific customizing functions
- * @arch_setup:			platform specific architecure setup
+ * @arch_setup:			platform specific architecture setup
  * @banner:			print a platform specific banner
  */
 struct x86_init_oem {
@@ -165,8 +165,25 @@ struct x86_legacy_devices {
 };
 
 /**
+ * enum x86_legacy_i8042_state - i8042 keyboard controller state
+ * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on
+ *	given platform/subarch.
+ * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
+ *	is absent.
+ * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ *	present, the i8042 driver should probe for controller existence.
+ */
+enum x86_legacy_i8042_state {
+	X86_LEGACY_I8042_PLATFORM_ABSENT,
+	X86_LEGACY_I8042_FIRMWARE_ABSENT,
+	X86_LEGACY_I8042_EXPECTED_PRESENT,
+};
+
+/**
  * struct x86_legacy_features - legacy x86 features
  *
+ * @i8042: indicated if we expect the device to have i8042 controller
+ *	present.
  * @rtc: this device has a CMOS real-time clock present
  * @reserve_bios_regions: boot code will search for the EBDA address and the
  * 	start of the 640k - 1M BIOS region.  If false, the platform must
@@ -175,6 +192,7 @@ struct x86_legacy_devices {
  * 	documentation for further details.
  */
 struct x86_legacy_features {
+	enum x86_legacy_i8042_state i8042;
 	int rtc;
 	int reserve_bios_regions;
 	struct x86_legacy_devices devices;
@@ -188,15 +206,14 @@ struct x86_legacy_features {
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
  * @nmi_init			enable NMI on cpus
- * @i8042_detect		pre-detect if i8042 controller exists
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
- * @apic_post_init:		adjust apic if neeeded
+ * @apic_post_init:		adjust apic if needed
  * @legacy:			legacy features
  * @set_legacy_features:	override legacy features. Use of this callback
  * 				is highly discouraged. You should only need
  * 				this if your hardware platform requires further
- * 				custom fine tuning far beyong what may be
+ * 				custom fine tuning far beyond what may be
  * 				possible in x86_early_init_platform_quirks() by
  * 				only using the current x86_hardware_subarch
  * 				semantics.
@@ -210,7 +227,6 @@ struct x86_platform_ops {
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
 	unsigned char (*get_nmi_reason)(void);
-	int (*i8042_detect)(void);
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
 	void (*apic_post_init)(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6f65b0eed384..64422f850e95 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -930,6 +930,13 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 		x86_platform.legacy.devices.pnpbios = 0;
 	}
 
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    !(acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) &&
+	    x86_platform.legacy.i8042 != X86_LEGACY_I8042_PLATFORM_ABSENT) {
+		pr_debug("ACPI: i8042 controller is absent\n");
+		x86_platform.legacy.i8042 = X86_LEGACY_I8042_FIRMWARE_ABSENT;
+	}
+
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
 		pr_debug("ACPI: not registering RTC platform device\n");
 		x86_platform.legacy.rtc = 0;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cb272a7a5a3..c5b8f760473c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -337,7 +337,11 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
 }
 
-static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+/*
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 	unsigned long flags;
 
@@ -346,7 +350,6 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
 
 	local_irq_save(flags);
 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
-	sync_core();
 	local_irq_restore(flags);
 
 	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
@@ -359,9 +362,12 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
  * This implies that asymmetric systems where APs have less capabilities than
  * the boot processor are not handled. Tough. Make sure you disable such
  * features by hand.
+ *
+ * Marked "noinline" to cause control flow change and thus insn cache
+ * to refetch changed I$ lines.
  */
-void __init_or_module apply_alternatives(struct alt_instr *start,
-					 struct alt_instr *end)
+void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+						  struct alt_instr *end)
 {
 	struct alt_instr *a;
 	u8 *instr, *replacement;
@@ -667,7 +673,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
 	unsigned long flags;
 	local_irq_save(flags);
 	memcpy(addr, opcode, len);
-	sync_core();
 	local_irq_restore(flags);
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1f6b50a449ab..dc1697ca5191 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -667,13 +667,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		c->x86_capability[CPUID_1_EDX] = edx;
 	}
 
+	/* Thermal and Power Management Leaf: level 0x00000006 (eax) */
+	if (c->cpuid_level >= 0x00000006)
+		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
+
 	/* Additional Intel-defined flags: level 0x00000007 */
 	if (c->cpuid_level >= 0x00000007) {
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
-
 		c->x86_capability[CPUID_7_0_EBX] = ebx;
-
-		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
 		c->x86_capability[CPUID_7_ECX] = ecx;
 	}
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6f353bdb3a25..6a31e2691f3a 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -116,10 +116,11 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
 
 /*
  * This scans the ucode blob for the proper container as we can have multiple
- * containers glued together.
+ * containers glued together. Returns the equivalence ID from the equivalence
+ * table or 0 if none found.
  */
-static struct container
-find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
+static u16
+find_proper_container(u8 *ucode, size_t size, struct container *ret_cont)
 {
 	struct container ret = { NULL, 0 };
 	u32 eax, ebx, ecx, edx;
@@ -138,7 +139,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 	if (header[0] != UCODE_MAGIC ||
 	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
 	    header[2] == 0)                            /* size */
-		return ret;
+		return eq_id;
 
 	eax = 0x00000001;
 	ecx = 0;
@@ -163,8 +164,9 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 			 * ucode update loop below
 			 */
 			left = ret.size - offset;
-			*ret_id = eq_id;
-			return ret;
+
+			*ret_cont = ret;
+			return eq_id;
 		}
 
 		/*
@@ -189,7 +191,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id)
 		ucode     = data;
 	}
 
-	return ret;
+	return eq_id;
 }
 
 static int __apply_microcode_amd(struct microcode_amd *mc_amd)
@@ -214,17 +216,18 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd)
  * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
  * load_microcode_amd() to save equivalent cpu table and microcode patches in
  * kernel heap memory.
+ *
+ * Returns true if container found (sets @ret_cont), false otherwise.
  */
-static struct container
-apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
+static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
+				      struct container *ret_cont)
 {
-	struct container ret = { NULL, 0 };
 	u8 (*patch)[PATCH_MAX_SIZE];
+	u32 rev, *header, *new_rev;
+	struct container ret;
 	int offset, left;
-	u32 rev, *header;
-	u8  *data;
 	u16 eq_id = 0;
-	u32 *new_rev;
+	u8  *data;
 
 #ifdef CONFIG_X86_32
 	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
@@ -235,11 +238,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
 #endif
 
 	if (check_current_patch_level(&rev, true))
-		return (struct container){ NULL, 0 };
+		return false;
 
-	ret = find_proper_container(ucode, size, &eq_id);
+	eq_id = find_proper_container(ucode, size, &ret);
 	if (!eq_id)
-		return (struct container){ NULL, 0 };
+		return false;
 
 	this_equiv_id = eq_id;
 	header = (u32 *)ret.data;
@@ -273,7 +276,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch)
 		data   += offset;
 		left   -= offset;
 	}
-	return ret;
+
+	if (ret_cont)
+		*ret_cont = ret;
+
+	return true;
 }
 
 static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -294,6 +301,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
 void __init load_ucode_amd_bsp(unsigned int family)
 {
 	struct ucode_cpu_info *uci;
+	u32 eax, ebx, ecx, edx;
 	struct cpio_data cp;
 	const char *path;
 	bool use_pa;
@@ -315,9 +323,12 @@ void __init load_ucode_amd_bsp(unsigned int family)
 		return;
 
 	/* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */
-	uci->cpu_sig.sig = cpuid_eax(1);
+	eax = 1;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	uci->cpu_sig.sig = eax;
 
-	apply_microcode_early_amd(cp.data, cp.size, true);
+	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
 }
 
 #ifdef CONFIG_X86_32
@@ -349,7 +360,7 @@ void load_ucode_amd_ap(unsigned int family)
 	 * This would set amd_ucode_patch above so that the following APs can
 	 * use it directly instead of going down this path again.
 	 */
-	apply_microcode_early_amd(cp.data, cp.size, true);
+	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
 }
 #else
 void load_ucode_amd_ap(unsigned int family)
@@ -387,8 +398,7 @@ void load_ucode_amd_ap(unsigned int family)
 			}
 		}
 
-		cont = apply_microcode_early_amd(cp.data, cp.size, false);
-		if (!(cont.data && cont.size)) {
+		if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) {
 			cont.size = -1;
 			return;
 		}
@@ -443,7 +453,7 @@ int __init save_microcode_in_initrd_amd(unsigned int fam)
 				return -EINVAL;
 			}
 
-			cont = find_proper_container(cp.data, cp.size, &eq_id);
+			eq_id = find_proper_container(cp.data, cp.size, &cont);
 			if (!eq_id) {
 				cont.size = -1;
 				return -EINVAL;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6996413c78c3..2af69d27da62 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -44,7 +44,7 @@
 #define DRIVER_VERSION	"2.2"
 
 static struct microcode_ops	*microcode_ops;
-static bool dis_ucode_ldr;
+static bool dis_ucode_ldr = true;
 
 LIST_HEAD(microcode_cache);
 
@@ -76,6 +76,7 @@ struct cpu_info_ctx {
 static bool __init check_loader_disabled_bsp(void)
 {
 	static const char *__dis_opt_str = "dis_ucode_ldr";
+	u32 a, b, c, d;
 
 #ifdef CONFIG_X86_32
 	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -88,8 +89,23 @@ static bool __init check_loader_disabled_bsp(void)
 	bool *res = &dis_ucode_ldr;
 #endif
 
-	if (cmdline_find_option_bool(cmdline, option))
-		*res = true;
+	if (!have_cpuid_p())
+		return *res;
+
+	a = 1;
+	c = 0;
+	native_cpuid(&a, &b, &c, &d);
+
+	/*
+	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
+	 * completely accurate as xen pv guests don't see that CPUID bit set but
+	 * that's good enough as they don't land on the BSP path anyway.
+	 */
+	if (c & BIT(31))
+		return *res;
+
+	if (cmdline_find_option_bool(cmdline, option) <= 0)
+		*res = false;
 
 	return *res;
 }
@@ -121,9 +137,6 @@ void __init load_ucode_bsp(void)
 	if (check_loader_disabled_bsp())
 		return;
 
-	if (!have_cpuid_p())
-		return;
-
 	vendor = x86_cpuid_vendor();
 	family = x86_cpuid_family();
 
@@ -157,9 +170,6 @@ void load_ucode_ap(void)
 	if (check_loader_disabled_ap())
 		return;
 
-	if (!have_cpuid_p())
-		return;
-
 	vendor = x86_cpuid_vendor();
 	family = x86_cpuid_family();
 
@@ -233,14 +243,12 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 # endif
 
 	/*
-	 * Did we relocate the ramdisk?
-	 *
-	 * So we possibly relocate the ramdisk *after* applying microcode on the
-	 * BSP so we rely on use_pa (use physical addresses) - even if it is not
-	 * absolutely correct - to determine whether we've done the ramdisk
-	 * relocation already.
+	 * Fixup the start address: after reserve_initrd() runs, initrd_start
+	 * has the virtual address of the beginning of the initrd. It also
+	 * possibly relocates the ramdisk. In either case, initrd_start contains
+	 * the updated address so use that instead.
 	 */
-	if (!use_pa && relocated_ramdisk)
+	if (!use_pa && initrd_start)
 		start = initrd_start;
 
 	return find_cpio_data(path, (void *)start, size, NULL);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 54d50c3694d8..b624b54912e1 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -368,6 +368,26 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 	return patch;
 }
 
+static void cpuid_1(void)
+{
+	/*
+	 * According to the Intel SDM, Volume 3, 9.11.7:
+	 *
+	 *   CPUID returns a value in a model specific register in
+	 *   addition to its usual register return values. The
+	 *   semantics of CPUID cause it to deposit an update ID value
+	 *   in the 64-bit model-specific register at address 08BH
+	 *   (IA32_BIOS_SIGN_ID). If no update is present in the
+	 *   processor, the value in the MSR remains unmodified.
+	 *
+	 * Use native_cpuid -- this code runs very early and we don't
+	 * want to mess with paravirt.
+	 */
+	unsigned int eax = 1, ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+}
+
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
 	unsigned int val[2];
@@ -393,7 +413,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -593,7 +613,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
@@ -805,7 +825,7 @@ static int apply_microcode_intel(int cpu)
 	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
+	cpuid_1();
 
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6c044543545e..f37e02e41a77 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -30,6 +30,7 @@
 #include <asm/apic.h>
 #include <asm/timer.h>
 #include <asm/reboot.h>
+#include <asm/nmi.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -157,6 +158,26 @@ static unsigned char hv_get_nmi_reason(void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
+ * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle
+ * unknown NMI on the first CPU which gets it.
+ */
+static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
+{
+	static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+
+	if (!unknown_nmi_panic)
+		return NMI_DONE;
+
+	if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+		return NMI_HANDLED;
+
+	return NMI_DONE;
+}
+#endif
+
 static void __init ms_hyperv_init_platform(void)
 {
 	/*
@@ -182,6 +203,9 @@ static void __init ms_hyperv_init_platform(void)
 		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
 			lapic_timer_frequency);
 	}
+
+	register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
+			     "hv_nmi_unknown");
 #endif
 
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d33ef165b1f8..553acbbb4d32 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -68,7 +68,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 #endif
 
 	default:
-patch_default:
+patch_default: __maybe_unused
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index f4fcf26c9fce..11aaf1eaa0e4 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -80,7 +80,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 #endif
 
 	default:
-patch_default:
+patch_default: __maybe_unused
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 24a50301f150..91271122f0df 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -6,6 +6,7 @@
 
 void __init x86_early_init_platform_quirks(void)
 {
+	x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT;
 	x86_platform.legacy.rtc = 1;
 	x86_platform.legacy.reserve_bios_regions = 0;
 	x86_platform.legacy.devices.pnpbios = 1;
@@ -16,10 +17,14 @@ void __init x86_early_init_platform_quirks(void)
 		break;
 	case X86_SUBARCH_XEN:
 	case X86_SUBARCH_LGUEST:
+		x86_platform.legacy.devices.pnpbios = 0;
+		x86_platform.legacy.rtc = 0;
+		break;
 	case X86_SUBARCH_INTEL_MID:
 	case X86_SUBARCH_CE4100:
 		x86_platform.legacy.devices.pnpbios = 0;
 		x86_platform.legacy.rtc = 0;
+		x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
 		break;
 	}
 
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index ea7b7f9a3b9e..4443e499f279 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -6,6 +6,37 @@
 
 #define FRAME_HEADER_SIZE (sizeof(long) * 2)
 
+static void unwind_dump(struct unwind_state *state, unsigned long *sp)
+{
+	static bool dumped_before = false;
+	bool prev_zero, zero = false;
+	unsigned long word;
+
+	if (dumped_before)
+		return;
+
+	dumped_before = true;
+
+	printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n",
+			state->stack_info.type, state->stack_info.next_sp,
+			state->stack_mask, state->graph_idx);
+
+	for (sp = state->orig_sp; sp < state->stack_info.end; sp++) {
+		word = READ_ONCE_NOCHECK(*sp);
+
+		prev_zero = zero;
+		zero = word == 0;
+
+		if (zero) {
+			if (!prev_zero)
+				printk_deferred("%p: %016x ...\n", sp, 0);
+			continue;
+		}
+
+		printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word);
+	}
+}
+
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
 	unsigned long addr;
@@ -20,15 +51,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 				     addr_p);
 
-	if (!__kernel_text_address(addr)) {
-		printk_deferred_once(KERN_WARNING
-			"WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
-			(void *)addr, addr_p, state->task->comm,
-			state->task->pid);
-		return 0;
-	}
-
-	return addr;
+	return __kernel_text_address(addr) ? addr : 0;
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -46,7 +69,14 @@ static bool is_last_task_frame(struct unwind_state *state)
 	unsigned long bp = (unsigned long)state->bp;
 	unsigned long regs = (unsigned long)task_pt_regs(state->task);
 
-	return bp == regs - FRAME_HEADER_SIZE;
+	/*
+	 * We have to check for the last task frame at two different locations
+	 * because gcc can occasionally decide to realign the stack pointer and
+	 * change the offset of the stack frame by a word in the prologue of a
+	 * function called by head/entry code.
+	 */
+	return bp == regs - FRAME_HEADER_SIZE ||
+	       bp == regs - FRAME_HEADER_SIZE - sizeof(long);
 }
 
 /*
@@ -67,6 +97,7 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 			       size_t len)
 {
 	struct stack_info *info = &state->stack_info;
+	enum stack_type orig_type = info->type;
 
 	/*
 	 * If addr isn't on the current stack, switch to the next one.
@@ -80,6 +111,9 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 				   &state->stack_mask))
 			return false;
 
+	if (!state->orig_sp || info->type != orig_type)
+		state->orig_sp = addr;
+
 	return true;
 }
 
@@ -178,11 +212,13 @@ bool unwind_next_frame(struct unwind_state *state)
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
 			state->regs, state->task->comm,
 			state->task->pid, next_frame);
+		unwind_dump(state, (unsigned long *)state->regs);
 	} else {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
 			state->bp, state->task->comm,
 			state->task->pid, next_frame);
+		unwind_dump(state, state->bp);
 	}
 the_end:
 	state->stack_info.type = STACK_TYPE_UNKNOWN;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 0bd9f1287f39..11a93f005268 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -89,7 +89,6 @@ struct x86_cpuinit_ops x86_cpuinit = {
 };
 
 static void default_nmi_init(void) { };
-static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform __ro_after_init = {
 	.calibrate_cpu			= native_calibrate_cpu,
@@ -100,7 +99,6 @@ struct x86_platform_ops x86_platform __ro_after_init = {
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
 	.get_nmi_reason			= default_get_nmi_reason,
-	.i8042_detect			= default_i8042_detect,
 	.save_sched_clock_state 	= tsc_save_sched_clock_state,
 	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
 };
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 821cb41f00e6..ce4b06733c09 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -23,11 +23,6 @@
 #include <asm/io_apic.h>
 #include <asm/emergency-restart.h>
 
-static int ce4100_i8042_detect(void)
-{
-	return 0;
-}
-
 /*
  * The CE4100 platform has an internal 8051 Microcontroller which is
  * responsible for signaling to the external Power Management Unit the
@@ -145,7 +140,6 @@ static void sdv_pci_init(void)
 void __init x86_ce4100_early_setup(void)
 {
 	x86_init.oem.arch_setup = sdv_arch_setup;
-	x86_platform.i8042_detect = ce4100_i8042_detect;
 	x86_init.resources.probe_roms = x86_init_noop;
 	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
 	x86_init.mpparse.find_smp_config = x86_init_noop;
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index dd6cfa4ad3ac..61b5ed2b7d40 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -19,7 +19,7 @@ obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
-obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
+obj-$(subst m,y,$(CONFIG_MPU3050_I2C)) += platform_mpu3050.o
 obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
 obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
 # I2C GPIO Expanders
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 7850128f0026..12a272582cdc 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -161,12 +161,6 @@ static void intel_mid_arch_setup(void)
 	regulator_has_full_constraints();
 }
 
-/* MID systems don't have i8042 controller */
-static int intel_mid_i8042_detect(void)
-{
-	return 0;
-}
-
 /*
  * Moorestown does not have external NMI source nor port 0x61 to report
  * NMI status. The possible NMI sources are from pmu as a result of NMI
@@ -197,7 +191,6 @@ void __init x86_intel_mid_early_setup(void)
 	x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
 
 	x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
-	x86_platform.i8042_detect = intel_mid_i8042_detect;
 	x86_init.timers.wallclock_init = intel_mid_rtc_init;
 	x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
 
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index f5bad40936ac..b8f562049cad 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -25,7 +25,8 @@
  * @fmt:	format string.
  * ...		variadic argument list.
  */
-static void __init imr_self_test_result(int res, const char *fmt, ...)
+static __printf(2, 3)
+void __init imr_self_test_result(int res, const char *fmt, ...)
 {
 	va_list vlist;
 
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 0c2fae8d929d..73eb7fd4aec4 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -992,11 +992,12 @@ static void emit_relocs(int as_text, int use_real_mode)
 		die("Segment relocations found but --realmode not specified\n");
 
 	/* Order the relocations for more efficient processing */
-	sort_relocs(&relocs16);
 	sort_relocs(&relocs32);
 #if ELF_BITS == 64
 	sort_relocs(&relocs32neg);
 	sort_relocs(&relocs64);
+#else
+	sort_relocs(&relocs16);
 #endif
 
 	/* Print the relocations */
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 73a4e68448fc..77551f522202 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -983,7 +983,11 @@ static int __init i8042_pnp_init(void)
 #if defined(__ia64__)
 		return -ENODEV;
 #else
-		pr_info("PNP: No PS/2 controller found. Probing ports directly.\n");
+		pr_info("PNP: No PS/2 controller found.\n");
+		if (x86_platform.legacy.i8042 !=
+				X86_LEGACY_I8042_EXPECTED_PRESENT)
+			return -ENODEV;
+		pr_info("Probing ports directly.\n");
 		return 0;
 #endif
 	}
@@ -1070,8 +1074,8 @@ static int __init i8042_platform_init(void)
 
 #ifdef CONFIG_X86
 	u8 a20_on = 0xdf;
-	/* Just return if pre-detection shows no i8042 controller exist */
-	if (!x86_platform.i8042_detect())
+	/* Just return if platform does not have i8042 controller */
+	if (x86_platform.legacy.i8042 == X86_LEGACY_I8042_PLATFORM_ABSENT)
 		return -ENODEV;
 #endif
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-12-07 18:53 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-12-07 18:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 55f856e640560494518eaf24fe9d2d2089fba71a x86/unwind: Fix guess-unwinder regression

Misc fixes: a core dumping crash fix, a guess-unwinder regression fix, plus three 
build warning fixes.

 Thanks,

	Ingo

------------------>
Borislav Petkov (1):
      x86/platform/olpc: Fix resume handler build warning

Dan Carpenter (1):
      x86/apic/uv: Silence a shift wrapping warning

Dmitry Safonov (1):
      x86/coredump: Always use user_regs_struct for compat_elf_gregset_t

Josh Poimboeuf (1):
      x86/unwind: Fix guess-unwinder regression

Peter Foley (1):
      x86/build: Annotate die() with noreturn to fix build warning on clang


 arch/x86/include/asm/compat.h          | 4 +---
 arch/x86/kernel/apic/x2apic_uv_x.c     | 4 ++--
 arch/x86/kernel/unwind_guess.c         | 9 ++++++---
 arch/x86/platform/olpc/olpc-xo15-sci.c | 2 ++
 arch/x86/tools/relocs.h                | 2 +-
 5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 03d269bed941..24118c0b4640 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -272,7 +272,6 @@ struct compat_shmid64_ds {
 /*
  * The type of struct elf_prstatus.pr_reg in compatible core dumps.
  */
-#ifdef CONFIG_X86_X32_ABI
 typedef struct user_regs_struct compat_elf_gregset_t;
 
 /* Full regset -- prstatus on x32, otherwise on ia32 */
@@ -281,10 +280,9 @@ typedef struct user_regs_struct compat_elf_gregset_t;
   do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
   while (0)
 
+#ifdef CONFIG_X86_X32_ABI
 #define COMPAT_USE_64BIT_TIME \
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
-#else
-typedef struct user_regs_struct32 compat_elf_gregset_t;
 #endif
 
 /*
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index aeef53ce93e1..35690a168cf7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -815,9 +815,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
 				l = li;
 			}
 			addr1 = (base << shift) +
-				f * (unsigned long)(1 << m_io);
+				f * (1ULL << m_io);
 			addr2 = (base << shift) +
-				(l + 1) * (unsigned long)(1 << m_io);
+				(l + 1) * (1ULL << m_io);
 			pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
 				id, fi, li, lnasid, addr1, addr2);
 			if (max_io < l)
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index b80e8bf43cc6..22881ddcbb9f 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -7,11 +7,13 @@
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
-	unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+	unsigned long addr;
 
 	if (unwind_done(state))
 		return 0;
 
+	addr = READ_ONCE_NOCHECK(*state->sp);
+
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
 				     addr, state->sp);
 }
@@ -25,11 +27,12 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 
 	do {
-		unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+		for (state->sp++; state->sp < info->end; state->sp++) {
+			unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
 
-		for (state->sp++; state->sp < info->end; state->sp++)
 			if (__kernel_text_address(addr))
 				return true;
+		}
 
 		state->sp = info->next_sp;
 
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 55130846ac87..c0533fbc39e3 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -196,6 +196,7 @@ static int xo15_sci_remove(struct acpi_device *device)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int xo15_sci_resume(struct device *dev)
 {
 	/* Enable all EC events */
@@ -207,6 +208,7 @@ static int xo15_sci_resume(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume);
 
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index f59590645b68..1d23bf953a4a 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -16,7 +16,7 @@
 #include <regex.h>
 #include <tools/le_byteshift.h>
 
-void die(char *fmt, ...);
+void die(char *fmt, ...) __attribute__((noreturn));
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-11-22 15:41 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-11-22 15:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: e5dce2868818ca8706924f7bdc7939d481eefab0 x86/platform/intel-mid: Rename platform_wdt to platform_mrfld_wdt

Misc fixes:

 - two fixes to make (very) old Intel CPUs boot reliably
 - fix the intel-mid driver and rename it
 - two KASAN false positive fixes
 - an FPU fix
 - two sysfb fixes
 - two build fixes related to new toolchain versions

 Thanks,

	Ingo

------------------>
Andy Lutomirski (2):
      x86/traps: Ignore high word of regs->cs in early_fixup_exception()
      x86/boot: Fail the boot if !M486 and CPUID is missing

Andy Shevchenko (2):
      x86/platform/intel-mid: Register watchdog device after SCU
      x86/platform/intel-mid: Rename platform_wdt to platform_mrfld_wdt

Arnd Bergmann (1):
      x86/boot: Avoid warning for zero-filling .bss

David Herrmann (2):
      x86/sysfb: Add support for 64bit EFI lfb_base
      x86/sysfb: Fix lfb_size calculation

H.J. Lu (1):
      x86/build: Build compressed x86 kernels as PIE when !CONFIG_RELOCATABLE as well

Josh Poimboeuf (2):
      x86/unwind: Prevent KASAN false positive warnings in guess unwinder
      x86/dumpstack: Prevent KASAN false positive warnings

Yu-cheng Yu (1):
      x86/fpu: Fix invalid FPU ptrace state after execve()


 arch/x86/boot/compressed/Makefile                  |  5 ++-
 arch/x86/boot/cpu.c                                |  6 ++++
 arch/x86/kernel/dumpstack.c                        |  2 +-
 arch/x86/kernel/fpu/core.c                         | 16 ++++-----
 arch/x86/kernel/head_32.S                          |  9 +++--
 arch/x86/kernel/sysfb_simplefb.c                   | 39 +++++++++++++++++-----
 arch/x86/kernel/unwind_guess.c                     |  8 +++--
 arch/x86/mm/extable.c                              |  7 +++-
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +-
 .../{platform_wdt.c => platform_mrfld_wdt.c}       | 34 +++++++++++++++----
 10 files changed, 95 insertions(+), 33 deletions(-)
 rename arch/x86/platform/intel-mid/device_libs/{platform_wdt.c => platform_mrfld_wdt.c} (65%)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 536ccfcc01c6..34d9e15857c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -40,8 +40,8 @@ GCOV_PROFILE := n
 UBSAN_SANITIZE :=n
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
-ifeq ($(CONFIG_RELOCATABLE),y)
-# If kernel is relocatable, build compressed kernel as PIE.
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
 ifeq ($(CONFIG_X86_32),y)
 LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
 else
@@ -51,7 +51,6 @@ else
 LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
 	&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
 endif
-endif
 LDFLAGS_vmlinux := -T
 
 hostprogs-y	:= mkpiggy
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 26240dde081e..4224ede43b4e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -87,6 +87,12 @@ int validate_cpu(void)
 		return -1;
 	}
 
+	if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
+	    !has_eflag(X86_EFLAGS_ID)) {
+		printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
+		return -1;
+	}
+
 	if (err_flags) {
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9b7cf5c28f5f..85f854b98a9d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		for (; stack < stack_info.end; stack++) {
 			unsigned long real_addr;
 			int reliable = 0;
-			unsigned long addr = *stack;
+			unsigned long addr = READ_ONCE_NOCHECK(*stack);
 			unsigned long *ret_addr_p =
 				unwind_get_return_address_ptr(&state);
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 47004010ad5d..ebb4e95fbd74 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
-	if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
-		/* FPU state will be reallocated lazily at the first use. */
-		fpu__drop(fpu);
-	} else {
-		if (!fpu->fpstate_active) {
-			fpu__activate_curr(fpu);
-			user_fpu_begin();
-		}
+	fpu__drop(fpu);
+
+	/*
+	 * Make sure fpstate is cleared and initialized.
+	 */
+	if (static_cpu_has(X86_FEATURE_FPU)) {
+		fpu__activate_curr(fpu);
+		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 	}
 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b6b2f0264af3..2dabea46f039 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else
-ENTRY(initial_page_table)
+.globl initial_page_table
+initial_page_table:
 	.fill 1024,4,0
 #endif
 initial_pg_fixmap:
 	.fill 1024,4,0
-ENTRY(empty_zero_page)
+.globl empty_zero_page
+empty_zero_page:
 	.fill 4096,1,0
-ENTRY(swapper_pg_dir)
+.globl swapper_pg_dir
+swapper_pg_dir:
 	.fill 1024,4,0
 EXPORT_SYMBOL(empty_zero_page)
 
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
index 764a29f84de7..85195d447a92 100644
--- a/arch/x86/kernel/sysfb_simplefb.c
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si,
 {
 	struct platform_device *pd;
 	struct resource res;
-	unsigned long len;
+	u64 base, size;
+	u32 length;
 
-	/* don't use lfb_size as it may contain the whole VMEM instead of only
-	 * the part that is occupied by the framebuffer */
-	len = mode->height * mode->stride;
-	len = PAGE_ALIGN(len);
-	if (len > (u64)si->lfb_size << 16) {
+	/*
+	 * If the 64BIT_BASE capability is set, ext_lfb_base will contain the
+	 * upper half of the base address. Assemble the address, then make sure
+	 * it is valid and we can actually access it.
+	 */
+	base = si->lfb_base;
+	if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		base |= (u64)si->ext_lfb_base << 32;
+	if (!base || (u64)(resource_size_t)base != base) {
+		printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Don't use lfb_size as IORESOURCE size, since it may contain the
+	 * entire VMEM, and thus require huge mappings. Use just the part we
+	 * need, that is, the part where the framebuffer is located. But verify
+	 * that it does not exceed the advertised VMEM.
+	 * Note that in case of VBE, the lfb_size is shifted by 16 bits for
+	 * historical reasons.
+	 */
+	size = si->lfb_size;
+	if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+		size <<= 16;
+	length = mode->height * mode->stride;
+	length = PAGE_ALIGN(length);
+	if (length > size) {
 		printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
 		return -EINVAL;
 	}
@@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si,
 	memset(&res, 0, sizeof(res));
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	res.name = simplefb_resname;
-	res.start = si->lfb_base;
-	res.end = si->lfb_base + len - 1;
+	res.start = base;
+	res.end = res.start + length - 1;
 	if (res.end <= res.start)
 		return -EINVAL;
 
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 2d721e533cf4..b80e8bf43cc6 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -7,11 +7,13 @@
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
+	unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 	if (unwind_done(state))
 		return 0;
 
 	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
-				     *state->sp, state->sp);
+				     addr, state->sp);
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 
 	do {
+		unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
 		for (state->sp++; state->sp < info->end; state->sp++)
-			if (__kernel_text_address(*state->sp))
+			if (__kernel_text_address(addr))
 				return true;
 
 		state->sp = info->next_sp;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 79ae939970d3..fcd06f7526de 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	if (early_recursion_flag > 2)
 		goto halt_loop;
 
-	if (regs->cs != __KERNEL_CS)
+	/*
+	 * Old CPUs leave the high bits of CS on the stack
+	 * undefined.  I'm not sure which CPUs do this, but at least
+	 * the 486 DX works this way.
+	 */
+	if ((regs->cs & 0xFFFF) != __KERNEL_CS)
 		goto fail;
 
 	/*
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 429d08be7848..dd6cfa4ad3ac 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
similarity index 65%
rename from arch/x86/platform/intel-mid/device_libs/platform_wdt.c
rename to arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index de734134bc8d..3f1f1c77d090 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -1,5 +1,5 @@
 /*
- * platform_wdt.c: Watchdog platform library file
+ * Intel Merrifield watchdog platform device library file
  *
  * (C) Copyright 2014 Intel Corporation
  * Author: David Cohen <david.a.cohen@linux.intel.com>
@@ -14,7 +14,9 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/intel-mid_wdt.h>
+
 #include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
 #define TANGIER_EXT_TIMER0_MSI 15
@@ -50,14 +52,34 @@ static struct intel_mid_wdt_pdata tangier_pdata = {
 	.probe = tangier_probe,
 };
 
-static int __init register_mid_wdt(void)
+static int wdt_scu_status_change(struct notifier_block *nb,
+				 unsigned long code, void *data)
 {
-	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
-		wdt_dev.dev.platform_data = &tangier_pdata;
-		return platform_device_register(&wdt_dev);
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&wdt_dev);
+		return 0;
 	}
 
-	return -ENODEV;
+	return platform_device_register(&wdt_dev);
 }
 
+static struct notifier_block wdt_scu_notifier = {
+	.notifier_call	= wdt_scu_status_change,
+};
+
+static int __init register_mid_wdt(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	wdt_dev.dev.platform_data = &tangier_pdata;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before watchdog device
+	 * can be registered:
+	 */
+	intel_scu_notifier_add(&wdt_scu_notifier);
+
+	return 0;
+}
 rootfs_initcall(register_mid_wdt);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-11-14  8:03 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-11-14  8:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: d49597fd3bc7d9534de55e9256767f073be1b33a x86/cpu: Deal with broken firmware (VMWare/XEN)

Misc fixes:

 - fix an Intel/MID boot crash/hang bug
 - fix a cache topology mis-parsing bug on certain AMD CPUs
 - fix a virtualization firmware bug by adding a check+quirk workaround on the kernel side

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  drivers/pci/pci-mid.c              # e8a6123e9ead: x86/platform/intel-mid: Retr

 Thanks,

	Ingo

------------------>
Lukas Wunner (1):
      x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook

Thomas Gleixner (1):
      x86/cpu: Deal with broken firmware (VMWare/XEN)

Yazen Ghannam (1):
      x86/cpu/AMD: Fix cpu_llc_id for AMD Fam17h systems


 arch/x86/include/asm/intel-mid.h  |  1 +
 arch/x86/kernel/cpu/amd.c         |  6 +-----
 arch/x86/kernel/cpu/common.c      | 32 ++++++++++++++++++++++++++++++--
 arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
 drivers/pci/pci-mid.c             |  6 ++++++
 5 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 5b6753d1f7f4..49da9f497b90 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -17,6 +17,7 @@
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev);
 
 extern void intel_mid_pwr_power_off(void);
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b81fe2d63e15..1e81a37c034e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
-	unsigned int socket_id, core_complex_id;
 
 	bits = c->x86_coreid_bits;
 	/* Low order bits define the core id (index of core in socket) */
@@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	 if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
 		return;
 
-	socket_id	= (c->apicid >> bits) - 1;
-	core_complex_id	= (c->apicid & ((1 << bits) - 1)) >> 3;
-
-	per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id;
+	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bd910a7dd0a..cc9e980c68ec 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -979,6 +979,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c)
 }
 
 /*
+ * The physical to logical package id mapping is initialized from the
+ * acpi/mptables information. Make sure that CPUID actually agrees with
+ * that.
+ */
+static void sanitize_package_id(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned int pkg, apicid, cpu = smp_processor_id();
+
+	apicid = apic->cpu_present_to_apicid(cpu);
+	pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+	if (apicid != c->initial_apicid) {
+		pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n",
+		       cpu, apicid, c->initial_apicid);
+		c->initial_apicid = apicid;
+	}
+	if (pkg != c->phys_proc_id) {
+		pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n",
+		       cpu, pkg, c->phys_proc_id);
+		c->phys_proc_id = pkg;
+	}
+	c->logical_proc_id = topology_phys_to_logical_pkg(pkg);
+#else
+	c->logical_proc_id = 0;
+#endif
+}
+
+/*
  * This does the hard work of actually picking apart the CPU stuff...
  */
 static void identify_cpu(struct cpuinfo_x86 *c)
@@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
-	/* The boot/hotplug time assigment got cleared, restore it */
-	c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
+	sanitize_package_id(c);
 }
 
 /*
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 5d3b45ad1c03..67375dda451c 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	struct mid_pwr *pwr = midpwr;
+	int id, reg, bit;
+	u32 power;
+
+	if (!pwr || !pwr->available)
+		return PCI_UNKNOWN;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return PCI_UNKNOWN;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+	power = mid_pwr_get_state(pwr, reg);
+	return (__force pci_power_t)((power >> bit) & 3);
+}
+
 void intel_mid_pwr_power_off(void)
 {
 	struct mid_pwr *pwr = midpwr;
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index 55f453de562e..c7f3408e3148 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 	return intel_mid_pci_set_power_state(pdev, state);
 }
 
+static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	return intel_mid_pci_get_power_state(pdev);
+}
+
 static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
 {
 	return PCI_D3hot;
@@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev)
 static struct pci_platform_pm_ops mid_pci_platform_pm = {
 	.is_manageable	= mid_pci_power_manageable,
 	.set_state	= mid_pci_set_power_state,
+	.get_state	= mid_pci_get_power_state,
 	.choose_state	= mid_pci_choose_state,
 	.sleep_wake	= mid_pci_sleep_wake,
 	.run_wake	= mid_pci_run_wake,

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-10-28  8:41 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-10-28  8:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton, Borislav Petkov

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 1c27f646b18fb56308dff82784ca61951bad0b48 x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y

Misc fixes: three build fixes, an unwinder fix and a microcode loader fix.

 Thanks,

	Ingo

------------------>
Arnd Bergmann (1):
      x86/quirks: Hide maybe-uninitialized warning

Borislav Petkov (1):
      x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y

Jan Beulich (1):
      x86/build: Fix build with older GCC versions

Josh Poimboeuf (1):
      x86/unwind: Fix empty stack dereference in guess unwinder

Steven Rostedt (1):
      x86: Fix export for mcount and __fentry__


 arch/x86/entry/Makefile             | 4 ++--
 arch/x86/kernel/cpu/microcode/amd.c | 2 +-
 arch/x86/kernel/mcount_64.S         | 3 ++-
 arch/x86/kernel/quirks.c            | 3 +--
 arch/x86/kernel/unwind_guess.c      | 9 ++++++++-
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 77f28ce9c646..9976fcecd17e 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,8 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
-CFLAGS_syscall_64.o		+= -Wno-override-init
-CFLAGS_syscall_32.o		+= -Wno-override-init
+CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_32.o		+= $(call cc-option,-Wno-override-init,)
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 620ab06bcf45..017bda12caae 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void)
 	 * We need the physical address of the container for both bitness since
 	 * boot_params.hdr.ramdisk_image is a physical address.
 	 */
-	cont    = __pa(container);
+	cont    = __pa_nodebug(container);
 	cont_va = container;
 #endif
 
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index efe73aacf966..7b0d3da52fb4 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -18,8 +18,10 @@
 
 #ifdef CC_USING_FENTRY
 # define function_hook	__fentry__
+EXPORT_SYMBOL(__fentry__)
 #else
 # define function_hook	mcount
+EXPORT_SYMBOL(mcount)
 #endif
 
 /* All cases save the original rbp (8 bytes) */
@@ -295,7 +297,6 @@ GLOBAL(ftrace_stub)
 	jmp fgraph_trace
 END(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
-EXPORT_SYMBOL(function_hook)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 51402a7e4ca6..0bee04d41bed 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
 			amd_disable_seq_and_redirect_scrub);
 
-#endif
-
 #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 #include <linux/jump_label.h>
 #include <asm/string_64.h>
@@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
+#endif
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 9298993dc8b7..2d721e533cf4 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -47,7 +47,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	get_stack_info(first_frame, state->task, &state->stack_info,
 		       &state->stack_mask);
 
-	if (!__kernel_text_address(*first_frame))
+	/*
+	 * The caller can provide the address of the first frame directly
+	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+	 * to start unwinding at.  Skip ahead until we reach it.
+	 */
+	if (!unwind_done(state) &&
+	    (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+	    !__kernel_text_address(*first_frame)))
 		unwind_next_frame(state);
 }
 EXPORT_SYMBOL_GPL(__unwind_start);

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-10-22 11:16 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-10-22 11:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
	Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: ff8560512b8d4b7ca3ef4fd69166634ac30b2525 x86/boot/smp: Don't try to poke disabled/non-existent APIC

Three fixes, a hw-enablement and a cross-arch fix/enablement change:

 - SGI/UV fix for older platforms

 - x32 signal handling fix

 - older x86 platform bootup APIC fix

 - AVX512-4VNNIW (Neural Network Instructions) and AVX512-4FMAPS (Multiply 
   Accumulation Single precision instructions) enablement.

 - move thread_info back into x86 specific code, to make life easier for other 
   architectures trying to make use of CONFIG_THREAD_INFO_IN_TASK_STRUCT=y.

  out-of-topic modifications in x86-urgent-for-linus:
  -----------------------------------------------------
  include/linux/thread_info.h        # c8061485a0d7: sched/core, x86: Make struct

 Thanks,

	Ingo

------------------>
Alex Thorlton (1):
      x86/platform/UV: Fix support for EFI_OLD_MEMMAP after BIOS callback updates

Dmitry Safonov (1):
      x86/signal: Remove bogus user_64bit_mode() check from sigaction_compat_abi()

Heiko Carstens (1):
      sched/core, x86: Make struct thread_info arch specific again

Piotr Luc (1):
      x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features

Renat Valiullin (1):
      x86/vmware: Skip timer_irq_works() check on VMware

Ville Syrjälä (1):
      x86/boot/smp: Don't try to poke disabled/non-existent APIC


 arch/x86/include/asm/cpufeatures.h       |  2 ++
 arch/x86/include/asm/thread_info.h       |  9 +++++++++
 arch/x86/kernel/cpu/scattered.c          |  2 ++
 arch/x86/kernel/cpu/vmware.c             |  5 +++++
 arch/x86/kernel/fpu/xstate.c             |  2 ++
 arch/x86/kernel/signal_compat.c          |  3 ---
 arch/x86/kernel/smpboot.c                | 16 +++++++++-------
 arch/x86/platform/uv/bios_uv.c           | 10 +++++++++-
 include/linux/thread_info.h              | 11 -----------
 tools/arch/x86/include/asm/cpufeatures.h |  2 ++
 10 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc849ee3..a39629206864 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2aaca53c0974..ad6f5eb07a95 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,6 +52,15 @@ struct task_struct;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.flags		= 0,			\
+}
+
 #define init_stack		(init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df9398d..1db8dc490b66 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 81160578b91a..5130985b758b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -27,6 +27,7 @@
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
+#include <asm/timer.h>
 #include <asm/apic.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
@@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void)
 	} else {
 		pr_warn("Failed to get TSC freq from the hypervisor\n");
 	}
+
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c593f8..095ef7ddd6ae 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 40df33753bae..ec1f756f9dc9 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
 	/* Don't let flags to be set from userspace */
 	act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
 
-	if (user_64bit_mode(current_pt_regs()))
-		return;
-
 	if (in_ia32_syscall())
 		act->sa.sa_flags |= SA_IA32_ABI;
 	if (in_x32_syscall())
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 951f093a96fe..42f5eb7b4f6c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void)
 
 	/* No boot processor was found in mptable or ACPI MADT */
 	if (!num_processors) {
-		int apicid = boot_cpu_physical_apicid;
-		int cpu = hard_smp_processor_id();
+		if (boot_cpu_has(X86_FEATURE_APIC)) {
+			int apicid = boot_cpu_physical_apicid;
+			int cpu = hard_smp_processor_id();
 
-		pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
+			pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
 
-		/* Make sure boot cpu is enumerated */
-		if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
-		    apic->apic_id_valid(apicid))
-			generic_processor_info(apicid, boot_cpu_apic_version);
+			/* Make sure boot cpu is enumerated */
+			if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
+			    apic->apic_id_valid(apicid))
+				generic_processor_info(apicid, boot_cpu_apic_version);
+		}
 
 		if (!num_processors)
 			num_processors = 1;
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index b4d5e95fe4df..4a6a5a26c582 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 		 */
 		return BIOS_STATUS_UNIMPLEMENTED;
 
-	ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+	/*
+	 * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+	 * callback method, which uses efi_call() directly, with the kernel page tables:
+	 */
+	if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+		ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
+	else
+		ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 45f004e9cc59..2873baf5372a 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -14,17 +14,6 @@ struct timespec;
 struct compat_timespec;
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-struct thread_info {
-	unsigned long		flags;		/* low level flags */
-};
-
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.flags		= 0,			\
-}
-#endif
-
-#ifdef CONFIG_THREAD_INFO_IN_TASK
 #define current_thread_info() ((struct thread_info *)current)
 #endif
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1188bc849ee3..a39629206864 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */

^ permalink raw reply	[flat|nested] 539+ messages in thread

* [GIT PULL] x86 fixes
@ 2016-10-18 11:22 Ingo Molnar
  0 siblings, 0 replies; 539+ messages in thread
From: Ingo Molnar @ 2016-10-18 11:22 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

   # HEAD: 0e60439c22ec551e9b5780eba73ddfeb63ab63d9 Merge branch 'mm/pkeys' into x86/urgent, to pick up pkeys fix

Misc fixes, plus hw-enablement changes:

 - fix persistent RAM handling
 - remove pkeys warning
 - remove duplicate macro
 - fix debug warning in irq handler
 - Add new 'Knights Mill' CPU related constants and enable the perf bits

 Thanks,

	Ingo

------------------>
Dan Williams (1):
      x86/e820: Don't merge consecutive E820_PRAM ranges

Dave Jones (1):
      pkeys: Remove easily triggered WARN

Longpeng(Mike) (1):
      x86: Remove duplicate rtit status MSR macro

Piotr Luc (4):
      x86/cpu/intel: Add Knights Mill to Intel family
      perf/x86/intel: Add Knights Mill CPUID
      perf/x86/intel/rapl: Add Knights Mill CPUID
      perf/x86/intel/uncore: Add Knights Mill CPUID

Wanpeng Li (1):
      x86/smp: Add irq_enter/exit() in smp_reschedule_interrupt()


 arch/x86/events/intel/core.c        | 3 ++-
 arch/x86/events/intel/rapl.c        | 1 +
 arch/x86/events/intel/uncore.c      | 1 +
 arch/x86/include/asm/intel-family.h | 1 +
 arch/x86/include/asm/msr-index.h    | 1 -
 arch/x86/kernel/e820.c              | 2 +-
 arch/x86/kernel/smp.c               | 2 ++
 7 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a3a9eb84b5cf..eab0915f5995 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3898,6 +3898,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
 		memcpy(hw_cache_event_ids,
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
@@ -3912,7 +3913,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
-		pr_cont("Knights Landing events, ");
+		pr_cont("Knights Landing/Mill events, ");
 		break;
 
 	case INTEL_FAM6_SKYLAKE_MOBILE:
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b0f0e835a770..0a535cea8ff3 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d9844cc74486..efca2685d876 100644
--- a/a