LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [git pull] x86 fixes
@ 2008-03-11 16:12 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2008-03-11 16:12 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Andrew Morton, Thomas Gleixner, H. Peter Anvin
Linus, please pull the latest x86 git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git for-linus
the inclusion of the quicklist removal patch was a hard decision but we
tend to think that it's best way forward: re-tuning the quicklist
freeing thresholds would probably solve the regression too but has risks
of its own as the rules are not simple at all. (64-bit x86 does not use
quicklists either)
Note: the ia32 syscall restart fix from Roland has an impact on the ABI.
(it's a good fix IMO, but still it's a change.)
Ingo
------------------>
Ingo Molnar (1):
x86: ioremap, remove WARN_ON()
Roland McGrath (1):
x86: ia32 syscall restart fix
Thomas Gleixner (1):
x86: remove quicklists
arch/x86/Kconfig | 3 ---
arch/x86/kernel/ptrace.c | 9 ++++++++-
arch/x86/kernel/signal_64.c | 38 +++++++++++++++++++++++++++++++++-----
arch/x86/mm/ioremap.c | 2 --
arch/x86/mm/pgtable_32.c | 18 +++++++++---------
include/asm-x86/pgtable_32.h | 5 ++---
6 files changed, 52 insertions(+), 23 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f41c953..237fc12 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -66,9 +66,6 @@ config MMU
config ZONE_DMA
def_bool y
-config QUICKLIST
- def_bool X86_32
-
config SBUS
bool
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8f64abe..d5904ee 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1055,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
R32(esi, si);
R32(ebp, bp);
R32(eax, ax);
- R32(orig_eax, orig_ax);
R32(eip, ip);
R32(esp, sp);
+ case offsetof(struct user32, regs.orig_eax):
+ /*
+ * Sign-extend the value so that orig_eax = -1
+ * causes (long)orig_ax < 0 tests to fire correctly.
+ */
+ regs->orig_ax = (long) (s32) value;
+ break;
+
case offsetof(struct user32, regs.eflags):
return set_flags(child, value);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 56b72fb..1c83e51 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -311,6 +311,35 @@ give_sigsegv:
}
/*
+ * Return -1L or the syscall number that @regs is executing.
+ */
+static long current_syscall(struct pt_regs *regs)
+{
+ /*
+ * We always sign-extend a -1 value being set here,
+ * so this is always either -1L or a syscall number.
+ */
+ return regs->orig_ax;
+}
+
+/*
+ * Return a value that is -EFOO if the system call in @regs->orig_ax
+ * returned an error. This only works for @regs from @current.
+ */
+static long current_syscall_ret(struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ return (int) regs->ax;
+#endif
+ return regs->ax;
+}
+
+/*
* OK, we're invoking a handler
*/
@@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
#endif
/* Are we from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (current_syscall(regs) >= 0) {
/* If so, check system call restarting.. */
- switch (regs->ax) {
+ switch (current_syscall_ret(regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (current_syscall(regs) >= 0) {
/* Restart the system call - no handlers present */
- long res = regs->ax;
- switch (res) {
+ switch (current_syscall_ret(regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac3c959..8fe576b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,8 +134,6 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
return NULL;
}
- WARN_ON_ONCE(page_is_ram(pfn));
-
switch (mode) {
case IOR_MODE_UNCACHED:
default:
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 73aba71..2f9e9af 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- mm->pgd = pgd; /* so that alloc_pd can use it */
+ /* so that alloc_pd can use it */
+ mm->pgd = pgd;
+ if (pgd)
+ pgd_ctor(pgd);
if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
- quicklist_free(0, pgd_dtor, pgd);
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
pgd = NULL;
}
@@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
pgd_mop_up_pmds(mm, pgd);
- quicklist_free(0, pgd_dtor, pgd);
-}
-
-void check_pgt_cache(void)
-{
- quicklist_trim(0, pgd_dtor, 25, 16);
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
}
void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index a842c72..4e6a0fc 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -26,10 +26,9 @@ struct mm_struct;
struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024];
-extern struct kmem_cache *pmd_cache;
-void check_pgt_cache(void);
-static inline void pgtable_cache_init(void) {}
+static inline void pgtable_cache_init(void) { }
+static inline void check_pgt_cache(void) { }
void paging_init(void);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2023-09-22 10:33 [GIT PULL] " Ingo Molnar
@ 2023-09-22 20:19 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2023-09-22 20:19 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Dave Hansen, Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
The pull request you sent on Fri, 22 Sep 2023 12:33:16 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-22
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e583bffeb8bc3a7b64455b14376afd5fad71d62f
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2023-09-22 10:33 Ingo Molnar
2023-09-22 20:19 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2023-09-22 10:33 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Dave Hansen,
Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-22
# HEAD: 509ff51ee652c41a277c2b439aea01a8f56a27b9 x86/shstk: Add warning for shadow stack double unmap
Misc x86 fixes:
- Fix a kexec bug,
- Fix an UML build bug,
- Fix a handful of SRSO related bugs,
- Fix a shadow stacks handling bug & robustify related code.
Thanks,
Ingo
------------------>
Josh Poimboeuf (4):
x86/srso: Fix srso_show_state() side effect
x86/srso: Set CPUID feature bits independently of bug or mitigation status
x86/srso: Don't probe microcode in a guest
x86/srso: Fix SBPB enablement for spec_rstack_overflow=off
Rick Edgecombe (3):
x86/shstk: Handle vfork clone failure correctly
x86/shstk: Remove useless clone error handling
x86/shstk: Add warning for shadow stack double unmap
Rik van Riel (1):
x86/mm, kexec, ima: Use memblock_free_late() from ima_free_kexec_buffer()
Vincent Whitchurch (1):
x86/asm: Fix build of UML with KASAN
arch/x86/include/asm/linkage.h | 7 +++++++
arch/x86/include/asm/mmu_context.h | 3 +--
arch/x86/include/asm/processor.h | 2 --
arch/x86/kernel/cpu/amd.c | 28 +++++++++-------------------
arch/x86/kernel/cpu/bugs.c | 17 +++--------------
arch/x86/kernel/process.c | 7 -------
arch/x86/kernel/setup.c | 8 ++------
arch/x86/kernel/shstk.c | 33 +++++++++++++++++++++++++++++++--
arch/x86/lib/memcpy_64.S | 2 +-
arch/x86/lib/memmove_64.S | 2 +-
arch/x86/lib/memset_64.S | 2 +-
11 files changed, 56 insertions(+), 55 deletions(-)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 5ff49fd67732..571fe4d2d232 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -105,6 +105,13 @@
CFI_POST_PADDING \
SYM_FUNC_END(__cfi_##name)
+/* UML needs to be able to override memcpy() and friends for KASAN. */
+#ifdef CONFIG_UML
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK
+#else
+# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS
+#endif
+
/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
#define SYM_TYPED_FUNC_START(name) \
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 416901d406f8..8dac45a2c7fc 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -186,8 +186,7 @@ do { \
#else
#define deactivate_mm(tsk, mm) \
do { \
- if (!tsk->vfork_done) \
- shstk_free(tsk); \
+ shstk_free(tsk); \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0086920cda06..a3669a7774ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -683,13 +683,11 @@ extern u16 get_llc_id(unsigned int cpu);
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
-extern bool cpu_has_ibpb_brtype_microcode(void);
extern void amd_clear_divider(void);
extern void amd_check_microcode(void);
#else
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
-static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index dd8379d84445..03ef962a6992 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -766,6 +766,15 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_TOPOEXT))
smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
+ if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+ else if (c->x86 >= 0x19 && !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+ setup_force_cpu_cap(X86_FEATURE_SBPB);
+ }
+ }
}
static void init_amd_k8(struct cpuinfo_x86 *c)
@@ -1301,25 +1310,6 @@ void amd_check_microcode(void)
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
-bool cpu_has_ibpb_brtype_microcode(void)
-{
- switch (boot_cpu_data.x86) {
- /* Zen1/2 IBPB flushes branch type predictions too. */
- case 0x17:
- return boot_cpu_has(X86_FEATURE_AMD_IBPB);
- case 0x19:
- /* Poke the MSR bit on Zen3/4 to check its presence. */
- if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
- setup_force_cpu_cap(X86_FEATURE_SBPB);
- return true;
- } else {
- return false;
- }
- default:
- return false;
- }
-}
-
/*
* Issue a DIV 0/1 insn to clear any division data from previous DIV
* operations.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f081d26616ac..10499bcd4e39 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2404,26 +2404,15 @@ early_param("spec_rstack_overflow", srso_parse_cmdline);
static void __init srso_select_mitigation(void)
{
- bool has_microcode;
+ bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE);
if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
goto pred_cmd;
- /*
- * The first check is for the kernel running as a guest in order
- * for guests to verify whether IBPB is a viable mitigation.
- */
- has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
if (!has_microcode) {
pr_warn("IBPB-extending microcode not applied!\n");
pr_warn(SRSO_NOTICE);
} else {
- /*
- * Enable the synthetic (even if in a real CPUID leaf)
- * flags for guests.
- */
- setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
-
/*
* Zen1/2 with SMT off aren't vulnerable after the right
* IBPB microcode has been applied.
@@ -2444,7 +2433,7 @@ static void __init srso_select_mitigation(void)
switch (srso_cmd) {
case SRSO_CMD_OFF:
- return;
+ goto pred_cmd;
case SRSO_CMD_MICROCODE:
if (has_microcode) {
@@ -2717,7 +2706,7 @@ static ssize_t srso_show_state(char *buf)
return sysfs_emit(buf, "%s%s\n",
srso_strings[srso_mitigation],
- (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
+ boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode");
}
static ssize_t gds_show_state(char *buf)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f0909142a0a..b6f4e8399fca 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -257,13 +257,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
io_bitmap_share(p);
- /*
- * If copy_thread() if failing, don't leak the shadow stack possibly
- * allocated in shstk_alloc_thread_stack() above.
- */
- if (ret)
- shstk_free(p);
-
return ret;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b9145a63da77..b098b1fa2470 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -358,15 +358,11 @@ static void __init add_early_ima_buffer(u64 phys_addr)
#if defined(CONFIG_HAVE_IMA_KEXEC) && !defined(CONFIG_OF_FLATTREE)
int __init ima_free_kexec_buffer(void)
{
- int rc;
-
if (!ima_kexec_buffer_size)
return -ENOENT;
- rc = memblock_phys_free(ima_kexec_buffer_phys,
- ima_kexec_buffer_size);
- if (rc)
- return rc;
+ memblock_free_late(ima_kexec_buffer_phys,
+ ima_kexec_buffer_size);
ima_kexec_buffer_phys = 0;
ima_kexec_buffer_size = 0;
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index fd689921a1db..59e15dd8d0f8 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -205,10 +205,21 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long cl
return 0;
/*
- * For CLONE_VM, except vfork, the child needs a separate shadow
+ * For CLONE_VFORK the child will share the parents shadow stack.
+ * Make sure to clear the internal tracking of the thread shadow
+ * stack so the freeing logic run for child knows to leave it alone.
+ */
+ if (clone_flags & CLONE_VFORK) {
+ shstk->base = 0;
+ shstk->size = 0;
+ return 0;
+ }
+
+ /*
+ * For !CLONE_VM the child will use a copy of the parents shadow
* stack.
*/
- if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
+ if (!(clone_flags & CLONE_VM))
return 0;
size = adjust_shstk_size(stack_size);
@@ -408,7 +419,25 @@ void shstk_free(struct task_struct *tsk)
if (!tsk->mm || tsk->mm != current->mm)
return;
+ /*
+ * If shstk->base is NULL, then this task is not managing its
+ * own shadow stack (CLONE_VFORK). So skip freeing it.
+ */
+ if (!shstk->base)
+ return;
+
+ /*
+ * shstk->base is NULL for CLONE_VFORK child tasks, and so is
+ * normal. But size = 0 on a shstk->base is not normal and
+ * indicated an attempt to free the thread shadow stack twice.
+ * Warn about it.
+ */
+ if (WARN_ON(!shstk->size))
+ return;
+
unmap_shadow_stack(shstk->base, shstk->size);
+
+ shstk->size = 0;
}
static int wrss_control(bool enable)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 8f95fb267caa..76697df8dfd5 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -40,7 +40,7 @@ SYM_TYPED_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_ALIAS(memcpy, __memcpy)
+SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0559b206fb11..ccdf3a597045 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -212,5 +212,5 @@ SYM_FUNC_START(__memmove)
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
-SYM_FUNC_ALIAS(memmove, __memmove)
+SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 7c59a704c458..3d818b849ec6 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
-SYM_FUNC_ALIAS(memset, __memset)
+SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2023-09-17 17:44 Ingo Molnar
@ 2023-09-17 18:24 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2023-09-17 18:24 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Dave Hansen, Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
The pull request you sent on Sun, 17 Sep 2023 19:44:16 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-17
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e789286468a9ea7acb5aae09a11bdfc7d46874ec
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2023-09-17 17:44 Ingo Molnar
2023-09-17 18:24 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2023-09-17 17:44 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Dave Hansen,
Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-17
# HEAD: 75b2f7e4c9e0fd750a5a27ca9736d1daa7a3762a x86/purgatory: Remove LTO flags
Misc fixes:
- Fix an UV boot crash,
- Skip spurious ENDBR generation on _THIS_IP_,
- Fix ENDBR use in putuser() asm methods,
- Fix corner case boot crashes on 5-level paging,
- and fix a false positive WARNING on LTO kernels.
Thanks,
Ingo
------------------>
Kirill A. Shutemov (1):
x86/boot/compressed: Reserve more memory for page tables
Peter Zijlstra (2):
x86/ibt: Suppress spurious ENDBR
x86/ibt: Avoid duplicate ENDBR in __put_user_nocheck*()
Song Liu (1):
x86/purgatory: Remove LTO flags
Steve Wahl (1):
x86/platform/uv: Use alternate source for socket to node data
arch/x86/boot/compressed/ident_map_64.c | 8 ++++++
arch/x86/include/asm/boot.h | 47 ++++++++++++++++++++++-----------
arch/x86/include/asm/linkage.h | 8 ++++++
arch/x86/kernel/apic/x2apic_uv_x.c | 11 ++++----
arch/x86/lib/putuser.S | 4 ---
arch/x86/purgatory/Makefile | 4 +++
include/linux/instruction_pointer.h | 5 ++++
7 files changed, 62 insertions(+), 25 deletions(-)
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index bcc956c17872..08f93b0401bb 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -59,6 +59,14 @@ static void *alloc_pgt_page(void *context)
return NULL;
}
+ /* Consumed more tables than expected? */
+ if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) {
+ debug_putstr("pgt_buf running low in " __FILE__ "\n");
+ debug_putstr("Need to raise BOOT_PGT_SIZE?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ }
+
entry = pages->pgt_buf + pages->pgt_buf_offset;
pages->pgt_buf_offset += PAGE_SIZE;
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 4ae14339cb8c..b3a7cfb0d99e 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -40,23 +40,40 @@
#ifdef CONFIG_X86_64
# define BOOT_STACK_SIZE 0x4000
-# define BOOT_INIT_PGT_SIZE (6*4096)
-# ifdef CONFIG_RANDOMIZE_BASE
/*
- * Assuming all cross the 512GB boundary:
- * 1 page for level4
- * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
- * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
- * Total is 19 pages.
+ * Used by decompressor's startup_32() to allocate page tables for identity
+ * mapping of the 4G of RAM in 4-level paging mode:
+ * - 1 level4 table;
+ * - 1 level3 table;
+ * - 4 level2 table that maps everything with 2M pages;
+ *
+ * The additional level5 table needed for 5-level paging is allocated from
+ * trampoline_32bit memory.
*/
-# ifdef CONFIG_X86_VERBOSE_BOOTUP
-# define BOOT_PGT_SIZE (19*4096)
-# else /* !CONFIG_X86_VERBOSE_BOOTUP */
-# define BOOT_PGT_SIZE (17*4096)
-# endif
-# else /* !CONFIG_RANDOMIZE_BASE */
-# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE
-# endif
+# define BOOT_INIT_PGT_SIZE (6*4096)
+
+/*
+ * Total number of page tables kernel_add_identity_map() can allocate,
+ * including page tables consumed by startup_32().
+ *
+ * Worst-case scenario:
+ * - 5-level paging needs 1 level5 table;
+ * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel,
+ * assuming all of them cross 256T boundary:
+ * + 4*2 level4 table;
+ * + 4*2 level3 table;
+ * + 4*2 level2 table;
+ * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM):
+ * + 1 level4 table;
+ * + 1 level3 table;
+ * + 1 level2 table;
+ * Total: 28 tables
+ *
+ * Add 4 spare table in case decompressor touches anything beyond what is
+ * accounted above. Warn if it happens.
+ */
+# define BOOT_PGT_SIZE_WARN (28*4096)
+# define BOOT_PGT_SIZE (32*4096)
#else /* !CONFIG_X86_64 */
# define BOOT_STACK_SIZE 0x1000
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 97a3de7892d3..5ff49fd67732 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -8,6 +8,14 @@
#undef notrace
#define notrace __attribute__((no_instrument_function))
+#ifdef CONFIG_64BIT
+/*
+ * The generic version tends to create spurious ENDBR instructions under
+ * certain conditions.
+ */
+#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; })
+#endif
+
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d9f5d7492f83..205cee567629 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1533,7 +1533,7 @@ static void __init build_socket_tables(void)
{
struct uv_gam_range_entry *gre = uv_gre_table;
int nums, numn, nump;
- int cpu, i, lnid;
+ int i, lnid, apicid;
int minsock = _min_socket;
int maxsock = _max_socket;
int minpnode = _min_pnode;
@@ -1584,15 +1584,14 @@ static void __init build_socket_tables(void)
/* Set socket -> node values: */
lnid = NUMA_NO_NODE;
- for_each_possible_cpu(cpu) {
- int nid = cpu_to_node(cpu);
- int apicid, sockid;
+ for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) {
+ int nid = __apicid_to_node[apicid];
+ int sockid;
- if (lnid == nid)
+ if ((nid == NUMA_NO_NODE) || (lnid == nid))
continue;
lnid = nid;
- apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 1451e0c4ae22..235bbda6fc82 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -56,7 +56,6 @@ SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
SYM_FUNC_START(__put_user_nocheck_1)
- ENDBR
ASM_STAC
2: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -76,7 +75,6 @@ SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
SYM_FUNC_START(__put_user_nocheck_2)
- ENDBR
ASM_STAC
4: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@@ -96,7 +94,6 @@ SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
SYM_FUNC_START(__put_user_nocheck_4)
- ENDBR
ASM_STAC
6: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@@ -119,7 +116,6 @@ SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
SYM_FUNC_START(__put_user_nocheck_8)
- ENDBR
ASM_STAC
9: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index c2a29be35c01..08aa0f25f12a 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -19,6 +19,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
# optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+# When LTO is enabled, llvm emits many text sections, which is not supported
+# by kexec. Remove -flto=* flags.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h
index cda1f706eaeb..aa0b3ffea935 100644
--- a/include/linux/instruction_pointer.h
+++ b/include/linux/instruction_pointer.h
@@ -2,7 +2,12 @@
#ifndef _LINUX_INSTRUCTION_POINTER_H
#define _LINUX_INSTRUCTION_POINTER_H
+#include <asm/linkage.h>
+
#define _RET_IP_ (unsigned long)__builtin_return_address(0)
+
+#ifndef _THIS_IP_
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
#endif /* _LINUX_INSTRUCTION_POINTER_H */
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2023-09-10 16:26 ` [GIT PULL] x86 fixes Ingo Molnar
@ 2023-09-10 18:08 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2023-09-10 18:08 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Dave Hansen, Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
The pull request you sent on Sun, 10 Sep 2023 18:26:30 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-10
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e56b2b6057996c5f48da518c79d6590f8bfaabf3
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
2023-09-02 10:24 [GIT PULL] x86 fix Ingo Molnar
@ 2023-09-10 16:26 ` Ingo Molnar
2023-09-10 18:08 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2023-09-10 16:26 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Dave Hansen,
Peter Zijlstra, Andrew Morton, H. Peter Anvin, x86
Linus,
Please pull the latest x86/urgent Git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-09-10
# HEAD: 3d7d72a34e05b23e21bafc8bfb861e73c86b31f3 x86/sgx: Break up long non-preemptible delays in sgx_vepc_release()
Fix preemption delays in the SGX code, remove unnecessarily UAPI-exported code,
fix a ld.lld linker (in)compatibility quirk and make the x86 SMP init code a bit
more conservative to fix kexec() lockups.
Thanks,
Ingo
------------------>
Jack Wang (1):
x86/sgx: Break up long non-preemptible delays in sgx_vepc_release()
Song Liu (1):
x86/build: Fix linker fill bytes quirk/incompatibility for ld.lld
Thomas Gleixner (1):
x86/smp: Don't send INIT to non-present and non-booted CPUs
Thomas Huth (1):
x86: Remove the arch_calc_vm_prot_bits() macro from the UAPI
arch/x86/include/asm/mman.h | 15 +++++++++++++++
arch/x86/include/uapi/asm/mman.h | 8 --------
arch/x86/kernel/cpu/sgx/virt.c | 3 +++
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/kernel/vmlinux.lds.S | 2 +-
| 1 -
6 files changed, 20 insertions(+), 11 deletions(-)
create mode 100644 arch/x86/include/asm/mman.h
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
new file mode 100644
index 000000000000..12b820259b9f
--- /dev/null
+++ b/arch/x86/include/asm/mman.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#define arch_calc_vm_prot_bits(prot, key) ( \
+ ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
+ ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
+ ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
+ ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+#endif
+
+#include <uapi/asm/mman.h>
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 8148bdddbd2c..46cdc941f958 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,14 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-#define arch_calc_vm_prot_bits(prot, key) ( \
- ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
- ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
- ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \
- ((key) & 0x8 ? VM_PKEY_BIT3 : 0))
-#endif
-
/* Flags for map_shadow_stack(2) */
#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index c3e37eaec8ec..7aaa3652e31d 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -204,6 +204,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
continue;
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -222,6 +223,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
list_add_tail(&epc_page->list, &secs_pages);
xa_erase(&vepc->page_array, index);
+ cond_resched();
}
/*
@@ -243,6 +245,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
if (sgx_vepc_free_page(epc_page))
list_add_tail(&epc_page->list, &secs_pages);
+ cond_resched();
}
if (!list_empty(&secs_pages))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d7667a29acf3..4e45ff44aa07 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1250,7 +1250,7 @@ bool smp_park_other_cpus_in_init(void)
if (this_cpu)
return false;
- for_each_present_cpu(cpu) {
+ for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
if (cpu == this_cpu)
continue;
apicid = apic->cpu_present_to_apicid(cpu);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 83d41c2601d7..f15fb71f280e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -156,7 +156,7 @@ SECTIONS
ALIGN_ENTRY_TEXT_END
*(.gnu.warning)
- } :text =0xcccc
+ } :text = 0xcccccccc
/* End of text section, which should occupy whole number of pages */
_etext = .;
--git a/scripts/headers_install.sh b/scripts/headers_install.sh
index afdddc82f02b..56d3c338d91d 100755
--- a/scripts/headers_install.sh
+++ b/scripts/headers_install.sh
@@ -81,7 +81,6 @@ arch/nios2/include/uapi/asm/swab.h:CONFIG_NIOS2_CI_SWAB_NO
arch/nios2/include/uapi/asm/swab.h:CONFIG_NIOS2_CI_SWAB_SUPPORT
arch/x86/include/uapi/asm/auxvec.h:CONFIG_IA32_EMULATION
arch/x86/include/uapi/asm/auxvec.h:CONFIG_X86_64
-arch/x86/include/uapi/asm/mman.h:CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
"
for c in $configs
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2023-08-26 17:54 Ingo Molnar
@ 2023-08-26 18:08 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2023-08-26 18:08 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Dave Hansen, x86, H. Peter Anvin
The pull request you sent on Sat, 26 Aug 2023 19:54:39 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-08-26
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/28f20a19294da7df158dfca259d0e2b5866baaf9
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2023-08-26 17:54 Ingo Molnar
2023-08-26 18:08 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2023-08-26 17:54 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Dave Hansen, x86, H. Peter Anvin
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-08-26
# HEAD: 2c66ca3949dc701da7f4c9407f2140ae425683a5 x86/fpu: Set X86_FEATURE_OSXSAVE feature after enabling OSXSAVE in CR4
Fix an FPU invalidation bug on exec(), and fix a performance
regression due to a missing setting of X86_FEATURE_OSXSAVE.
Thanks,
Ingo
------------------>
Feng Tang (1):
x86/fpu: Set X86_FEATURE_OSXSAVE feature after enabling OSXSAVE in CR4
Rick Edgecombe (1):
x86/fpu: Invalidate FPU state correctly on exec()
arch/x86/kernel/fpu/context.h | 3 +--
arch/x86/kernel/fpu/core.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 7 +++++++
3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index af5cbdd9bd29..f6d856bd50bc 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -19,8 +19,7 @@
* FPU state for a task MUST let the rest of the kernel know that the
* FPU registers are no longer valid for this task.
*
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
+ * Invalidate a resource you control: CPU if using the CPU for something else
* (with preemption disabled), FPU for the current task, or a task that
* is prevented from running by the current task.
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 1015af1ae562..98e507cc7d34 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
struct fpu *fpu = ¤t->thread.fpu;
fpregs_lock();
- fpu__drop(fpu);
+ __fpu_invalidate_fpregs_state(fpu);
/*
* This does not change the actual hardware registers. It just
* resets the memory image and sets TIF_NEED_FPU_LOAD so a
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0bab497c9436..1afbc4866b10 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ /*
+ * CPU capabilities initialization runs before FPU init. So
+ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+ * functional, set the feature bit so depending code works.
+ */
+ setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features,
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2023-02-11 8:59 Ingo Molnar
@ 2023-02-11 19:24 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2023-02-11 19:24 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton, Dave Hansen
The pull request you sent on Sat, 11 Feb 2023 09:59:20 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-02-11
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/47e9aa14ce5abca70d6584a8d8213707d197c38e
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2023-02-11 8:59 Ingo Molnar
2023-02-11 19:24 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2023-02-11 8:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton, Dave Hansen
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2023-02-11
# HEAD: f545e8831e70065e127f903fc7aca09aa50422c7 x86/cpu: Add Lunar Lake M
Fix a kprobes bug, plus add a new Intel model number to the
upstream <asm/intel-family.h> header for drivers to use.
Thanks,
Ingo
------------------>
Kan Liang (1):
x86/cpu: Add Lunar Lake M
Nadav Amit (1):
x86/kprobes: Fix 1 byte conditional jump target
arch/x86/include/asm/intel-family.h | 2 ++
arch/x86/kernel/kprobes/core.c | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 347707d459c6..cbaf174d8efd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -123,6 +123,8 @@
#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_FAM6_METEORLAKE_L 0xAA
+#define INTEL_FAM6_LUNARLAKE_M 0xBD
+
/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index b36f3c367cb2..695873c0f50b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -625,7 +625,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
/* 1 byte conditional jump */
p->ainsn.emulate_op = kprobe_emulate_jcc;
p->ainsn.jcc.type = opcode & 0xf;
- p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+ p->ainsn.rel32 = insn->immediate.value;
break;
case 0x0f:
opcode = insn->opcode.bytes[1];
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2022-08-28 14:57 Ingo Molnar
@ 2022-08-28 18:18 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2022-08-28 18:18 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Dave Hansen, H. Peter Anvin, x86
The pull request you sent on Sun, 28 Aug 2022 16:57:20 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-28
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/2f23a7c914317ac0b2a7e2bbe48dc00213652f98
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2022-08-28 14:57 Ingo Molnar
2022-08-28 18:18 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2022-08-28 14:57 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Dave Hansen, H. Peter Anvin, x86
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-28
# HEAD: 00da0cb385d05a89226e150a102eb49d8abb0359 Documentation/ABI: Mention retbleed vulnerability info file for sysfs
Misc fixes:
- Fix PAT on Xen, which caused i915 driver failures
- Fix compat INT 80 entry crash on Xen PV guests
- Fix 'MMIO Stale Data' mitigation status reporting on older Intel CPUs
- Fix RSB stuffing regressions
- Fix ORC unwinding on ftrace trampolines
- Add Intel Raptor Lake CPU model number
- Fix (work around) a SEV-SNP bootloader bug providing bogus values in
boot_params->cc_blob_address, by ignoring the value on !SEV-SNP bootups.
- Fix SEV-SNP early boot failure
- Fix the objtool list of noreturn functions and annotate snp_abort(),
which bug confused objtool on gcc-12.
- Fix the documentation for retbleed
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/sev: Mark snp_abort() noreturn
Chen Zhongjin (1):
x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry
Jan Beulich (1):
x86/PAT: Have pat_enabled() properly reflect state when running on Xen
Juergen Gross (1):
x86/entry: Fix entry_INT80_compat for Xen PV guests
Michael Roth (1):
x86/boot: Don't propagate uninitialized boot_params->cc_blob_address
Pawan Gupta (1):
x86/bugs: Add "unknown" reporting for MMIO Stale Data
Peter Zijlstra (2):
x86/nospec: Unwreck the RSB stuffing
x86/nospec: Fix i386 RSB stuffing
Salvatore Bonaccorso (1):
Documentation/ABI: Mention retbleed vulnerability info file for sysfs
Tom Lendacky (1):
x86/sev: Don't use cc_platform_has() for early SEV-SNP calls
Tony Luck (1):
x86/cpu: Add new Raptor Lake CPU model number
Documentation/ABI/testing/sysfs-devices-system-cpu | 1 +
.../hw-vuln/processor_mmio_stale_data.rst | 14 ++++
arch/x86/boot/compressed/misc.h | 12 ++-
arch/x86/boot/compressed/sev.c | 8 ++
arch/x86/entry/entry_64_compat.S | 2 +-
arch/x86/include/asm/cpufeatures.h | 5 +-
arch/x86/include/asm/intel-family.h | 2 +
arch/x86/include/asm/nospec-branch.h | 92 ++++++++++++----------
arch/x86/include/asm/sev.h | 2 +-
arch/x86/kernel/cpu/bugs.c | 14 +++-
arch/x86/kernel/cpu/common.c | 42 ++++++----
arch/x86/kernel/sev.c | 18 ++++-
arch/x86/kernel/unwind_orc.c | 15 ++--
arch/x86/mm/pat/memtype.c | 10 ++-
tools/objtool/check.c | 34 ++++----
15 files changed, 183 insertions(+), 88 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 5bf61881f012..760c889b6cd1 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -523,6 +523,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/retbleed
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 9393c50b5afc..c98fd11907cc 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -230,6 +230,20 @@ The possible values in this file are:
* - 'Mitigation: Clear CPU buffers'
- The processor is vulnerable and the CPU buffer clearing mitigation is
enabled.
+ * - 'Unknown: No mitigations'
+ - The processor vulnerability status is unknown because it is
+ out of Servicing period. Mitigation is not attempted.
+
+Definitions:
+------------
+
+Servicing period: The process of providing functional and security updates to
+Intel processors or platforms, utilizing the Intel Platform Update (IPU)
+process or other similar mechanisms.
+
+End of Servicing Updates (ESU): ESU is the date at which Intel will no
+longer provide Servicing, such as through IPU or other similar update
+processes. ESU dates will typically be aligned to end of quarter.
If the processor is vulnerable then the following information is appended to
the above information:
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 4910bf230d7b..62208ec04ca4 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr);
void snp_set_page_shared(unsigned long paddr);
void sev_prep_identity_maps(unsigned long top_level_pgt);
#else
-static inline void sev_enable(struct boot_params *bp) { }
+static inline void sev_enable(struct boot_params *bp)
+{
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 unconditionally (thus here in this stub too) to
+ * ensure that uninitialized values from buggy bootloaders aren't
+ * propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+}
static inline void sev_es_shutdown_ghcb(void) { }
static inline bool sev_es_check_ghcb_fault(unsigned long address)
{
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 52f989f6acc2..c93930d5ccbd 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp)
struct msr m;
bool snp;
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 to ensure that uninitialized values from
+ * buggy bootloaders aren't propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+
/*
* Setup/preliminary detection of SNP. This will be sanity-checked
* against CPUID/MSR values later.
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 682338e7e2a3..4dd19819053a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat)
* Interrupts are off on entry.
*/
ASM_CLAC /* Do this early to minimize exposure */
- SWAPGS
+ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
/*
* User tracing code (ptrace or signal handlers) might assume that
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 235dc85c91c3..ef4775c6db01 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -457,7 +457,8 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index def6ca121111..aeb38023a703 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -27,6 +27,7 @@
* _X - regular server parts
* _D - micro server parts
* _N,_P - other mobile parts
+ * _S - other client parts
*
* Historical OPTDIFFs:
*
@@ -112,6 +113,7 @@
#define INTEL_FAM6_RAPTORLAKE 0xB7
#define INTEL_FAM6_RAPTORLAKE_P 0xBA
+#define INTEL_FAM6_RAPTORLAKE_S 0xBF
/* "Small Core" Processors (Atom) */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e64fd20778b6..c936ce9f0c47 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,56 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ .rept nr; \
+ __FILL_RETURN_SLOT; \
+ .endr; \
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
#ifdef __ASSEMBLY__
@@ -132,28 +155,15 @@
#endif
.endm
-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 4a23e52fe0ee..ebc271bb6d8e 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void snp_abort(void);
+void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 510d85261132..da7c361f47e0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void)
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
- cpu_mitigations_off()) {
+ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
+ cpu_mitigations_off()) {
mmio_mitigation = MMIO_MITIGATION_OFF;
return;
}
@@ -538,6 +539,8 @@ static void __init md_clear_update_mitigation(void)
pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ pr_info("MMIO Stale Data: Unknown: No mitigations\n");
}
static void __init md_clear_select_mitigation(void)
@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static ssize_t mmio_stale_data_show_state(char *buf)
{
+ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ return sysfs_emit(buf, "Unknown: No mitigations\n");
+
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return srbds_show_state(buf);
case X86_BUG_MMIO_STALE_DATA:
+ case X86_BUG_MMIO_UNKNOWN:
return mmio_stale_data_show_state(buf);
case X86_BUG_RETBLEED:
@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
{
- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
+ else
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 64a73f415f03..3e508f239098 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SWAPGS BIT(6)
#define NO_ITLB_MULTIHIT BIT(7)
#define NO_SPECTRE_V2 BIT(8)
-#define NO_EIBRS_PBRSB BIT(9)
+#define NO_MMIO BIT(9)
+#define NO_EIBRS_PBRSB BIT(10)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
+ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
+ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
+ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
+ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
+
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
/*
* Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
{}
};
@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Affected CPU list is generally enough to enumerate the vulnerability,
* but for virtualization case check for ARCH_CAP MSR bits also, VMM may
* not want the guest to enumerate the bug.
+ *
+ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
+ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
- !arch_cap_mmio_immune(ia32_cap))
- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (cpu_matches(cpu_vuln_blacklist, MMIO))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
+ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
+ }
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 63dc626627a0..a428c62330d3 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -701,7 +701,13 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned int npages)
{
- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned int npages)
{
- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/* Invalidate the memory pages before they are marked shared in the RMP table. */
@@ -2100,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp)
return true;
}
-void __init snp_abort(void)
+void __init __noreturn snp_abort(void)
{
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 38185aedf7d1..0ea57da92940 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip);
static struct orc_entry *orc_ftrace_find(unsigned long ip)
{
struct ftrace_ops *ops;
- unsigned long caller;
+ unsigned long tramp_addr, offset;
ops = ftrace_ops_trampoline(ip);
if (!ops)
return NULL;
+ /* Set tramp_addr to the start of the code copied by the trampoline */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
- caller = (unsigned long)ftrace_regs_call;
+ tramp_addr = (unsigned long)ftrace_regs_caller;
else
- caller = (unsigned long)ftrace_call;
+ tramp_addr = (unsigned long)ftrace_caller;
+
+ /* Now place tramp_addr to the location within the trampoline ip is at */
+ offset = ip - ops->trampoline;
+ tramp_addr += offset;
/* Prevent unlikely recursion */
- if (ip == caller)
+ if (ip == tramp_addr)
return NULL;
- return orc_find(caller);
+ return orc_find(tramp_addr);
}
#else
static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64ddd35e..66a209f7eb86 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -62,6 +62,7 @@
static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;
@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
static int __init nopat(char *str)
{
pat_disable("PAT support disabled via boot option.");
+ pat_force_disabled = true;
return 0;
}
early_param("nopat", nopat);
@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-void init_cache_modes(void)
+void __init init_cache_modes(void)
{
u64 pat = 0;
@@ -313,6 +315,12 @@ void init_cache_modes(void)
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ /*
+ * Clearly PAT is enabled underneath. Allow pat_enabled() to
+ * reflect this.
+ */
+ pat_bp_enabled = true;
}
__init_cache_modes(pat);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0cec74da7ffe..ad51689dfb41 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -162,32 +162,34 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
/*
* Unfortunately these have to be hard coded because the noreturn
- * attribute isn't provided in ELF data.
+ * attribute isn't provided in ELF data. Keep 'em sorted.
*/
static const char * const global_noreturns[] = {
+ "__invalid_creds",
+ "__module_put_and_kthread_exit",
+ "__reiserfs_panic",
"__stack_chk_fail",
- "panic",
+ "__ubsan_handle_builtin_unreachable",
+ "cpu_bringup_and_idle",
+ "cpu_startup_entry",
"do_exit",
+ "do_group_exit",
"do_task_dead",
- "kthread_exit",
- "make_task_dead",
- "__module_put_and_kthread_exit",
+ "ex_handler_msr_mce",
+ "fortify_panic",
"kthread_complete_and_exit",
- "__reiserfs_panic",
+ "kthread_exit",
+ "kunit_try_catch_throw",
"lbug_with_loc",
- "fortify_panic",
- "usercopy_abort",
"machine_real_restart",
+ "make_task_dead",
+ "panic",
"rewind_stack_and_make_dead",
- "kunit_try_catch_throw",
- "xen_start_kernel",
- "cpu_bringup_and_idle",
- "do_group_exit",
+ "sev_es_terminate",
+ "snp_abort",
"stop_this_cpu",
- "__invalid_creds",
- "cpu_startup_entry",
- "__ubsan_handle_builtin_unreachable",
- "ex_handler_msr_mce",
+ "usercopy_abort",
+ "xen_start_kernel",
};
if (!func)
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2022-08-06 19:29 Ingo Molnar
@ 2022-08-07 0:50 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2022-08-07 0:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Dave Hansen, x86, H. Peter Anvin
The pull request you sent on Sat, 6 Aug 2022 21:29:39 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-06
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1612c382ffbdf1f673caec76502b1c00e6d35363
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2022-08-06 19:29 Ingo Molnar
2022-08-07 0:50 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2022-08-06 19:29 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Dave Hansen, x86, H. Peter Anvin
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2022-08-06
# HEAD: de979c83574abf6e78f3fa65b716515c91b2613d x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y
Misc fixes:
- an old(er) binutils build fix,
- a new-GCC build fix,
- and a kexec boot environment fix.
NOTE, there's a new conflict in arch/x86/um/Makefile, due to these two
commits interacting:
upstream: # 5b301409e8bc UML: add support for KASAN under x86_64
# adds two new targets to the single subarch-y line
x86/urgent: # de979c83574a x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y
# splits the ../entry/thunk_64.o target from the subarch-y line
it's a context conflict only, my resolution was:
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/memmove_64.o ../lib/memset_64.o
subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o
Thanks,
Ingo
------------------>
Andrea Righi (1):
x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y
Chenyi Qiang (1):
x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero
Siddh Raman Pant (1):
x86/numa: Use cpumask_available instead of hardcoded NULL check
arch/x86/entry/Makefile | 3 ++-
arch/x86/entry/thunk_32.S | 2 --
arch/x86/entry/thunk_64.S | 4 ----
arch/x86/kernel/cpu/intel.c | 27 ++++++++++++++-------------
arch/x86/mm/numa.c | 4 ++--
arch/x86/um/Makefile | 3 ++-
6 files changed, 20 insertions(+), 23 deletions(-)
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2021-03-28 10:44 Ingo Molnar
@ 2021-03-28 19:22 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2021-03-28 19:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 28 Mar 2021 12:44:42 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2021-03-28
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/36a14638f7c06546717cc1316fcfee6da42b98cc
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2021-03-28 10:44 Ingo Molnar
2021-03-28 19:22 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2021-03-28 10:44 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2021-03-28
# HEAD: 9fcb51c14da2953de585c5c6e50697b8a6e91a7b x86/build: Turn off -fcf-protection for realmode targets
Two fixes:
- Fix build failure on Ubuntu with new GCC packages that turn on -fcf-protection
- Fix SME memory encryption PTE encoding bug - AFAICT the code worked on
4K page sizes (level 1) but had the wrong shift at higher page level orders
(level 2 and higher).
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Thanks,
Ingo
------------------>
Arnd Bergmann (1):
x86/build: Turn off -fcf-protection for realmode targets
Isaku Yamahata (1):
x86/mem_encrypt: Correct physical address calculation in __set_clr_pte_enc()
arch/x86/Makefile | 2 +-
arch/x86/mm/mem_encrypt.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d6d5a28c3bf..9a85eae37b17 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,7 +27,7 @@ endif
REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
- -mno-mmx -mno-sse
+ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
REALMODE_CFLAGS += -ffreestanding
REALMODE_CFLAGS += -fno-stack-protector
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 4b01f7dbaf30..ae78cef79980 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -262,7 +262,7 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
if (pgprot_val(old_prot) == pgprot_val(new_prot))
return;
- pa = pfn << page_level_shift(level);
+ pa = pfn << PAGE_SHIFT;
size = page_level_size(level);
/*
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-10-11 18:00 ` Linus Torvalds
@ 2020-10-11 20:00 ` Thomas Gleixner
0 siblings, 0 replies; 547+ messages in thread
From: Thomas Gleixner @ 2020-10-11 20:00 UTC (permalink / raw)
To: Linus Torvalds, Ingo Molnar
Cc: Linux Kernel Mailing List, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
On Sun, Oct 11 2020 at 11:00, Linus Torvalds wrote:
> On Sun, Oct 11, 2020 at 1:09 AM Ingo Molnar <mingo@kernel.org> wrote:
>>
>> - Fix a (hopefully final) IRQ state tracking bug vs. MCE handling
>
> Why is the nmi_enter/nmi_exit thing not a problem on non-x86 architectures?
>
> Put another way: x86 does extra work to track IRQ state across NMI.
> What makes x86 special? It _feels_ to me like everybody should do
> that? No?
>
> Please tell me what I've missed..
Not a lot. The nmi related tracking plus the entry/exit inconsistencies
which we identified and fixed over the past couple of releases in x86
apply pretty much to all of arch/* in one way or the other and of course
in different flavours.
This is worked on by architecture people and will hopefully be solved at
some point by moving all architectures to the generic entry code.
Thanks,
tglx
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-10-11 8:08 Ingo Molnar
2020-10-11 18:00 ` Linus Torvalds
@ 2020-10-11 18:23 ` pr-tracker-bot
1 sibling, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-10-11 18:23 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 11 Oct 2020 10:08:59 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-10-11
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/c120ec12e216225f7536cc1b924fe428ff64b5bd
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-10-11 8:08 Ingo Molnar
@ 2020-10-11 18:00 ` Linus Torvalds
2020-10-11 20:00 ` Thomas Gleixner
2020-10-11 18:23 ` pr-tracker-bot
1 sibling, 1 reply; 547+ messages in thread
From: Linus Torvalds @ 2020-10-11 18:00 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linux Kernel Mailing List, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
On Sun, Oct 11, 2020 at 1:09 AM Ingo Molnar <mingo@kernel.org> wrote:
>
> - Fix a (hopefully final) IRQ state tracking bug vs. MCE handling
Why is the nmi_enter/nmi_exit thing not a problem on non-x86 architectures?
Put another way: x86 does extra work to track IRQ state across NMI.
What makes x86 special? It _feels_ to me like everybody should do
that? No?
Please tell me what I've missed..
Linus
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-10-11 8:08 Ingo Molnar
2020-10-11 18:00 ` Linus Torvalds
2020-10-11 18:23 ` pr-tracker-bot
0 siblings, 2 replies; 547+ messages in thread
From: Ingo Molnar @ 2020-10-11 8:08 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-10-11
# HEAD: 0c7689830e907668288a1a1da84dca66dbdb4728 Documentation/x86: Fix incorrect references to zero-page.txt
Two fixes:
- Fix a (hopefully final) IRQ state tracking bug vs. MCE handling
- Fix a documentation link
Thanks,
Ingo
------------------>
Heinrich Schuchardt (1):
Documentation/x86: Fix incorrect references to zero-page.txt
Thomas Gleixner (1):
x86/mce: Use idtentry_nmi_enter/exit()
Documentation/x86/boot.rst | 6 +++---
arch/x86/kernel/cpu/mce/core.c | 6 ++++--
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index 7fafc7ac00d7..abb9fc164657 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -1342,8 +1342,8 @@ follow::
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
-also fill the additional fields of the struct boot_params as that
-described in zero-page.txt.
+also fill the additional fields of the struct boot_params as
+described in chapter :doc:`zero-page`.
After setting up the struct boot_params, the boot loader can load the
32/64-bit kernel in the same way as that of 16-bit boot protocol.
@@ -1379,7 +1379,7 @@ can be calculated as follows::
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
also fill the additional fields of the struct boot_params as described
-in zero-page.txt.
+in chapter :doc:`zero-page`.
After setting up the struct boot_params, the boot loader can load
64-bit kernel in the same way as that of 16-bit boot protocol, but
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f43a78bde670..fc4f8c04bdb5 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1904,6 +1904,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
+ bool irq_state;
+
WARN_ON_ONCE(user_mode(regs));
/*
@@ -1914,7 +1916,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
mce_check_crashing_cpu())
return;
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
/*
* The call targets are marked noinstr, but objtool can't figure
* that out because it's an indirect call. Annotate it.
@@ -1925,7 +1927,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-09-06 8:15 Ingo Molnar
@ 2020-09-06 19:14 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-09-06 19:14 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 6 Sep 2020 10:15:15 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-09-06
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/015b3155c46a089f623c8a2e794ffad84143565d
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-09-06 8:15 Ingo Molnar
2020-09-06 19:14 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-09-06 8:15 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-09-06
# HEAD: 4facb95b7adaf77e2da73aafb9ba60996fe42a12 x86/entry: Unbreak 32bit fast syscall
Misc fixes:
- Fix more generic entry code ABI fallout
- Fix debug register handling bugs
- Fix vmalloc mappings on 32-bit kernels
- Fix kprobes instrumentation output on 32-bit kernels
- Fix over-eager WARN_ON_ONCE() on !SMAP hardware
- Fix NUMA debugging
- Fix Clang related crash on !RETPOLINE kernels
The most complex fixes are only a few days old and some haven't seen
-next yet, but I didn't think we should delay them.
out-of-topic modifications in x86-urgent-2020-09-06:
------------------------------------------------------
include/linux/entry-common.h # 4facb95b7ada: x86/entry: Unbreak 32bit fas
kernel/entry/common.c # 4facb95b7ada: x86/entry: Unbreak 32bit fas
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
x86/debug: Allow a single level of #DB recursion
Arvind Sankar (1):
x86/cmdline: Disable jump tables for cmdline.c
Huang Ying (1):
x86, fakenuma: Fix invalid starting node ID
Joerg Roedel (1):
x86/mm/32: Bring back vmalloc faulting on x86_32
Peter Zijlstra (1):
x86/entry: Fix AC assertion
Thomas Gleixner (1):
x86/entry: Unbreak 32bit fast syscall
Vamshi K Sthambamkadi (1):
tracing/kprobes, x86/ptrace: Fix regs argument order for i386
arch/x86/entry/common.c | 29 +++++++++-----
arch/x86/include/asm/entry-common.h | 12 +++++-
arch/x86/include/asm/ptrace.h | 2 +-
arch/x86/kernel/traps.c | 65 +++++++++++++++----------------
arch/x86/lib/Makefile | 2 +-
arch/x86/mm/fault.c | 78 +++++++++++++++++++++++++++++++++++++
arch/x86/mm/numa_emulation.c | 2 +-
include/linux/entry-common.h | 51 +++++++++++++++++++-----
kernel/entry/common.c | 35 ++++++++++++++---
9 files changed, 213 insertions(+), 63 deletions(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 48512c7944e7..2f84c7ca74ea 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
{
- unsigned int nr = (unsigned int)regs->orig_ax;
-
if (IS_ENABLED(CONFIG_IA32_EMULATION))
current_thread_info()->status |= TS_COMPAT;
- /*
- * Subtlety here: if ptrace pokes something larger than 2^32-1 into
- * orig_ax, the unsigned int return value truncates it. This may
- * or may not be necessary, but it matches the old asm behavior.
- */
- return (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
+ return (unsigned int)regs->orig_ax;
}
/*
@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+ * orig_ax, the unsigned int return value truncates it. This may
+ * or may not be necessary, but it matches the old asm behavior.
+ */
+ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs);
}
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
- unsigned int nr = syscall_32_enter(regs);
+ unsigned int nr = syscall_32_enter(regs);
int res;
+ /*
+ * This cannot use syscall_enter_from_user_mode() as it has to
+ * fetch EBP before invoking any of the syscall entry work
+ * functions.
+ */
+ syscall_enter_from_user_mode_prepare(regs);
+
instrumentation_begin();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return false;
}
+ /* The case truncates any ptrace induced syscall nr > 2^32 -1 */
+ nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs);
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index a8f9315b9eae..6fe54b2813c1 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
- WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
- X86_EFLAGS_NT));
+ unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+ /*
+ * For !SMAP hardware we patch out CLAC on entry.
+ */
+ if (boot_cpu_has(X86_FEATURE_SMAP) ||
+ (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
+ mask |= X86_EFLAGS_AC;
+
+ WARN_ON_ONCE(flags & mask);
/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 40aa69d04862..d8324a236696 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
static const unsigned int argument_offs[] = {
#ifdef __i386__
offsetof(struct pt_regs, ax),
- offsetof(struct pt_regs, cx),
offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, cx),
#define NR_REG_ARGUMENTS 3
#else
offsetof(struct pt_regs, di),
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1f66d2d1e998..81a2fb711091 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
+ get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
-}
+ dr6 &= ~DR6_RESERVED;
-static __always_inline void debug_exit(unsigned long dr7)
-{
- local_db_restore(dr7);
+ return dr6;
}
/*
@@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d46fff11f06f..aa067859a70b 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_cmdline.o = -pg
endif
-CFLAGS_cmdline.o := -fno-stack-protector
+CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
endif
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..6e3e8a124903 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This is needed because there is a race condition between the time
+ * when the vmalloc mapping code updates the PMD to the point in time
+ * where it synchronizes this update with the other page-tables in the
+ * system.
+ *
+ * In this race window another thread/CPU can map an area on the same
+ * PMD, finds it already present and does not synchronize it with the
+ * rest of the system yet. As a result v[mz]alloc might return areas
+ * which are not mapped in every page-table in the system, causing an
+ * unhandled page-fault when they are accessed.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3_pa();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ if (pmd_large(*pmd_k))
+ return 0;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(vmalloc_fault);
+
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
*/
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
+#ifdef CONFIG_X86_32
+ /*
+ * We can fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * Before doing this on-demand faulting, ensure that the
+ * fault is not any of the following:
+ * 1. A fault on a PTE with a reserved bit set.
+ * 2. A fault caused by a user-mode access. (Do not demand-
+ * fault kernel memory due to user-mode accesses).
+ * 3. A fault caused by a page-level protection violation.
+ * (A demand fault would be on a non-present page which
+ * would have X86_PF_PROT==0).
+ *
+ * This is only needed to close a race condition on x86-32 in
+ * the vmalloc mapping/unmapping code. See the comment above
+ * vmalloc_fault() for details. On x86-64 the race does not
+ * exist as the vmalloc mappings don't need to be synchronized
+ * there.
+ */
+ if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+#endif
+
/* Was the fault spurious, caused by lazy TLB invalidation? */
if (spurious_kernel_fault(hw_error_code, address))
return;
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index c5174b4e318b..683cd12f4793 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
u64 addr, u64 max_addr, u64 size)
{
return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
- 0, NULL, NUMA_NO_NODE);
+ 0, NULL, 0);
}
static int __init setup_emu2phys_nid(int *dfl_phys_nid)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index efebbffcd5cc..159c7476b11b 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
#endif
/**
- * syscall_enter_from_user_mode - Check and handle work before invoking
- * a syscall
+ * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
* @regs: Pointer to currents pt_regs
- * @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
* disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct and the subsequent functions can be
- * instrumented.
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This handles lockdep, RCU (context tracking) and tracing state.
+ *
+ * This is invoked when there is extra architecture specific functionality
+ * to be done between establishing state and handling user mode entry work.
+ */
+void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
+
+/**
+ * syscall_enter_from_user_mode_work - Check and handle work before invoking
+ * a syscall
+ * @regs: Pointer to currents pt_regs
+ * @syscall: The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
+ * architecture specific work.
*
* Returns: The original or a modified syscall number
*
@@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
* syscall_set_return_value() first. If neither of those are called and -1
* is returned, then the syscall will fail with ENOSYS.
*
- * The following functionality is handled here:
+ * It handles the following work items:
*
- * 1) Establish state (lockdep, RCU (context tracking), tracing)
- * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
+ * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
* __secure_computing(), trace_sys_enter()
- * 3) Invocation of audit_syscall_entry()
+ * 2) Invocation of audit_syscall_entry()
+ */
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
+
+/**
+ * syscall_enter_from_user_mode - Establish state and check and handle work
+ * before invoking a syscall
+ * @regs: Pointer to currents pt_regs
+ * @syscall: The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * disabled. The calling code has to be non-instrumentable. When the
+ * function returns all state is correct, interrupts are enabled and the
+ * subsequent functions can be instrumented.
+ *
+ * This is combination of syscall_enter_from_user_mode_prepare() and
+ * syscall_enter_from_user_mode_work().
+ *
+ * Returns: The original or a modified syscall number. See
+ * syscall_enter_from_user_mode_work() for further explanation.
*/
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index fcae019158ca..18683598edbc 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
return ret ? : syscall_get_nr(current, regs);
}
-noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+static __always_inline long
+__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
{
unsigned long ti_work;
- enter_from_user_mode(regs);
- instrumentation_begin();
-
- local_irq_enable();
ti_work = READ_ONCE(current_thread_info()->flags);
if (ti_work & SYSCALL_ENTER_WORK)
syscall = syscall_trace_enter(regs, syscall, ti_work);
- instrumentation_end();
return syscall;
}
+long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
+{
+ return __syscall_enter_from_user_work(regs, syscall);
+}
+
+noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+{
+ long ret;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ local_irq_enable();
+ ret = __syscall_enter_from_user_work(regs, syscall);
+ instrumentation_end();
+
+ return ret;
+}
+
+noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
+{
+ enter_from_user_mode(regs);
+ instrumentation_begin();
+ local_irq_enable();
+ instrumentation_end();
+}
+
/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-08-15 11:45 Ingo Molnar
@ 2020-08-16 1:55 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-08-16 1:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 15 Aug 2020 13:45:52 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-08-15
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/50f6c7dbd973092d8e5f3c89f29eb4bea19fdebd
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-08-15 11:45 Ingo Molnar
2020-08-16 1:55 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-08-15 11:45 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-08-15
# HEAD: a6d996cbd38b42341ad3fce74506b9fdc280e395 x86/alternatives: Acquire pte lock with interrupts enabled
Misc fixes and small updates all around the place:
- Fix mitigation state sysfs output
- Fix an FPU xstate/sxave code assumption bug triggered by Architectural LBR support
- Fix Lightning Mountain SoC TSC frequency enumeration bug
- Fix kexec debug output
- Fix kexec memory range assumption bug
- Fix a boundary condition in the crash kernel code
- Optimize porgatory.ro generation a bit
- Enable ACRN guests to use X2APIC mode
- Reduce a __text_poke() IRQs-off critical section for the benefit of PREEMPT_RT
Thanks,
Ingo
------------------>
Dilip Kota (1):
x86/tsr: Fix tsc frequency enumeration bug on Lightning Mountain SoC
Kan Liang (1):
x86/fpu/xstate: Fix an xstate size check warning with architectural LBRs
Lianbo Jiang (3):
x86/crash: Correct the address boundary of function parameters
kexec: Improve & fix crash_exclude_mem_range() to handle overlapping ranges
kexec_file: Correctly output debugging information for the PT_LOAD ELF header
Pawan Gupta (1):
x86/bugs/multihit: Fix mitigation reporting when VMX is not in use
Pingfan Liu (1):
x86/purgatory: Don't generate debug info for purgatory.ro
Sebastian Andrzej Siewior (1):
x86/alternatives: Acquire pte lock with interrupts enabled
Shuo Liu (2):
x86/acrn: Allow ACRN guest to use X2APIC mode
x86/acrn: Remove redundant chars from ACRN signature
Documentation/admin-guide/hw-vuln/multihit.rst | 4 +++
arch/x86/kernel/alternative.c | 6 ++--
arch/x86/kernel/cpu/acrn.c | 12 +++-----
arch/x86/kernel/cpu/bugs.c | 8 ++++-
arch/x86/kernel/crash.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 33 ++++++++++++++++++++-
arch/x86/kernel/tsc_msr.c | 9 ++++--
arch/x86/purgatory/Makefile | 5 +++-
kernel/kexec_file.c | 41 ++++++++++++++++----------
9 files changed, 88 insertions(+), 32 deletions(-)
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
index ba9988d8bce5..140e4cec38c3 100644
--- a/Documentation/admin-guide/hw-vuln/multihit.rst
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
@@ -80,6 +80,10 @@ The possible values in this file are:
- The processor is not vulnerable.
* - KVM: Mitigation: Split huge pages
- Software changes mitigate this issue.
+ * - KVM: Mitigation: VMX unsupported
+ - KVM is not vulnerable because Virtual Machine Extensions (VMX) is not supported.
+ * - KVM: Mitigation: VMX disabled
+ - KVM is not vulnerable because Virtual Machine Extensions (VMX) is disabled.
* - KVM: Vulnerable
- The processor is vulnerable, but no mitigation enabled
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c826cddae157..34a1b8562c31 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -874,8 +874,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
- local_irq_save(flags);
-
/*
* Map the page without the global bit, as TLB flushing is done with
* flush_tlb_mm_range(), which is intended for non-global PTEs.
@@ -892,6 +890,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
VM_BUG_ON(!ptep);
+ local_irq_save(flags);
+
pte = mk_pte(pages[0], pgprot);
set_pte_at(poking_mm, poking_addr, ptep, pte);
@@ -941,8 +941,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(memcmp(addr, opcode, len));
- pte_unmap_unlock(ptep, ptl);
local_irq_restore(flags);
+ pte_unmap_unlock(ptep, ptl);
return addr;
}
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
index 1da9b1c9a2db..0b2c03943ac6 100644
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -11,14 +11,15 @@
#include <linux/interrupt.h>
#include <asm/apic.h>
+#include <asm/cpufeatures.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/idtentry.h>
#include <asm/irq_regs.h>
-static uint32_t __init acrn_detect(void)
+static u32 __init acrn_detect(void)
{
- return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+ return hypervisor_cpuid_base("ACRNACRNACRN", 0);
}
static void __init acrn_init_platform(void)
@@ -29,12 +30,7 @@ static void __init acrn_init_platform(void)
static bool acrn_x2apic_available(void)
{
- /*
- * x2apic is not supported for now. Future enablement will have to check
- * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
- * guest.
- */
- return false;
+ return boot_cpu_has(X86_FEATURE_X2APIC);
}
static void (*acrn_intr_handler)(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f0b743a2fe9c..d3f0db463f96 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -31,6 +31,7 @@
#include <asm/intel-family.h>
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
+#include <asm/tlbflush.h>
#include "cpu.h"
@@ -1549,7 +1550,12 @@ static ssize_t l1tf_show_state(char *buf)
static ssize_t itlb_multihit_show_state(char *buf)
{
- if (itlb_multihit_kvm_mitigation)
+ if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+ !boot_cpu_has(X86_FEATURE_VMX))
+ return sprintf(buf, "KVM: Mitigation: VMX unsupported\n");
+ else if (!(cr4_read_shadow() & X86_CR4_VMXE))
+ return sprintf(buf, "KVM: Mitigation: VMX disabled\n");
+ else if (itlb_multihit_kvm_mitigation)
return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
else
return sprintf(buf, "KVM: Vulnerable\n");
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index fd87b59452a3..a8f3af257e26 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -230,7 +230,7 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
int ret = 0;
/* Exclude the low 1M because it is always reserved */
- ret = crash_exclude_mem_range(cmem, 0, 1<<20);
+ ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
if (ret)
return ret;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index be2a68a09d19..6073e342a1ed 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -611,6 +611,10 @@ static void check_xstate_against_struct(int nr)
* This essentially double-checks what the cpu told us about
* how large the XSAVE buffer needs to be. We are recalculating
* it to be safe.
+ *
+ * Dynamic XSAVE features allocate their own buffers and are not
+ * covered by these checks. Only the size of the buffer for task->fpu
+ * is checked here.
*/
static void do_extra_xstate_size_checks(void)
{
@@ -673,6 +677,33 @@ static unsigned int __init get_xsaves_size(void)
return ebx;
}
+/*
+ * Get the total size of the enabled xstates without the dynamic supervisor
+ * features.
+ */
+static unsigned int __init get_xsaves_size_no_dynamic(void)
+{
+ u64 mask = xfeatures_mask_dynamic();
+ unsigned int size;
+
+ if (!mask)
+ return get_xsaves_size();
+
+ /* Disable dynamic features. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+
+ /*
+ * Ask the hardware what size is required of the buffer.
+ * This is the size required for the task->fpu buffer.
+ */
+ size = get_xsaves_size();
+
+ /* Re-enable dynamic features so XSAVES will work on them again. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+
+ return size;
+}
+
static unsigned int __init get_xsave_size(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -710,7 +741,7 @@ static int __init init_xstate_size(void)
xsave_size = get_xsave_size();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- possible_xstate_size = get_xsaves_size();
+ possible_xstate_size = get_xsaves_size_no_dynamic();
else
possible_xstate_size = xsave_size;
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 4fec6f3a1858..a654a9b4b77c 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -133,10 +133,15 @@ static const struct freq_desc freq_desc_ann = {
.mask = 0x0f,
};
-/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
+/*
+ * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz
+ * Frequency step for Lightning Mountain SoC is fixed to 78 MHz,
+ * so all the frequency entries are 78000.
+ */
static const struct freq_desc freq_desc_lgm = {
.use_msr_plat = true,
- .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+ .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000,
+ 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
.mask = 0x0f,
};
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 088bd764e0b7..d24b43a4451a 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -32,7 +32,7 @@ KCOV_INSTRUMENT := n
# make up the standalone purgatory.ro
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
-PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0
PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector)
@@ -64,6 +64,9 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
+AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
+AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 94661d2d13ad..3f7867c1820f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -1157,24 +1157,26 @@ int crash_exclude_mem_range(struct crash_mem *mem,
unsigned long long mstart, unsigned long long mend)
{
int i, j;
- unsigned long long start, end;
+ unsigned long long start, end, p_start, p_end;
struct crash_mem_range temp_range = {0, 0};
for (i = 0; i < mem->nr_ranges; i++) {
start = mem->ranges[i].start;
end = mem->ranges[i].end;
+ p_start = mstart;
+ p_end = mend;
if (mstart > end || mend < start)
continue;
/* Truncate any area outside of range */
if (mstart < start)
- mstart = start;
+ p_start = start;
if (mend > end)
- mend = end;
+ p_end = end;
/* Found completely overlapping range */
- if (mstart == start && mend == end) {
+ if (p_start == start && p_end == end) {
mem->ranges[i].start = 0;
mem->ranges[i].end = 0;
if (i < mem->nr_ranges - 1) {
@@ -1185,20 +1187,29 @@ int crash_exclude_mem_range(struct crash_mem *mem,
mem->ranges[j].end =
mem->ranges[j+1].end;
}
+
+ /*
+ * Continue to check if there are another overlapping ranges
+ * from the current position because of shifting the above
+ * mem ranges.
+ */
+ i--;
+ mem->nr_ranges--;
+ continue;
}
mem->nr_ranges--;
return 0;
}
- if (mstart > start && mend < end) {
+ if (p_start > start && p_end < end) {
/* Split original range */
- mem->ranges[i].end = mstart - 1;
- temp_range.start = mend + 1;
+ mem->ranges[i].end = p_start - 1;
+ temp_range.start = p_end + 1;
temp_range.end = end;
- } else if (mstart != start)
- mem->ranges[i].end = mstart - 1;
+ } else if (p_start != start)
+ mem->ranges[i].end = p_start - 1;
else
- mem->ranges[i].start = mend + 1;
+ mem->ranges[i].start = p_end + 1;
break;
}
@@ -1235,7 +1246,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
unsigned long long notes_addr;
unsigned long mstart, mend;
- /* extra phdr for vmcoreinfo elf note */
+ /* extra phdr for vmcoreinfo ELF note */
nr_phdr = nr_cpus + 1;
nr_phdr += mem->nr_ranges;
@@ -1243,7 +1254,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
* kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
* area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
* I think this is required by tools like gdb. So same physical
- * memory will be mapped in two elf headers. One will contain kernel
+ * memory will be mapped in two ELF headers. One will contain kernel
* text virtual addresses and other will have __va(physical) addresses.
*/
@@ -1270,7 +1281,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- /* Prepare one phdr of type PT_NOTE for each present cpu */
+ /* Prepare one phdr of type PT_NOTE for each present CPU */
for_each_present_cpu(cpu) {
phdr->p_type = PT_NOTE;
notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
@@ -1312,10 +1323,10 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
phdr->p_align = 0;
ehdr->e_phnum++;
- phdr++;
- pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+ pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
ehdr->e_phnum, phdr->p_offset);
+ phdr++;
}
*addr = buf;
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-07-25 11:46 Ingo Molnar
@ 2020-07-25 22:30 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-07-25 22:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra
The pull request you sent on Sat, 25 Jul 2020 13:46:37 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-07-25
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fbe0d451bcea569fc0ed3455511a90646c8a9c81
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-07-25 11:46 Ingo Molnar
2020-07-25 22:30 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-07-25 11:46 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra
Linus,
Please pull the latest x86/urgent git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2020-07-25
# HEAD: d181d2da0141371bbc360eaea78719203e165e1c x86/dumpstack: Dump user space code correctly again
Misc fixes:
- Fix a section end page alignment assumption that was causing crashes
- Fix ORC unwinding on freshly forked tasks which haven't executed yet
and which have empty user task stacks
- Fix the debug.exception-trace=1 sysctl dumping of user stacks, which
was broken by recent maccess changes
out-of-topic modifications in x86-urgent-2020-07-25:
------------------------------------------------------
include/asm-generic/vmlinux.lds.h # de2b41be8fcc: x86, vmlinux.lds: Page-align
Thanks,
Ingo
------------------>
Joerg Roedel (1):
x86, vmlinux.lds: Page-align end of ..page_aligned sections
Josh Poimboeuf (2):
x86/unwind/orc: Fix ORC for newly forked tasks
x86/stacktrace: Fix reliable check for empty user task stacks
Thomas Gleixner (1):
x86/dumpstack: Dump user space code correctly again
arch/x86/kernel/dumpstack.c | 27 +++++++++++++++++----------
arch/x86/kernel/stacktrace.c | 5 -----
arch/x86/kernel/unwind_orc.c | 8 ++++++--
arch/x86/kernel/vmlinux.lds.S | 1 +
include/asm-generic/vmlinux.lds.h | 5 ++++-
5 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b037cfa7c0c5..7401cc12c3cc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -71,6 +71,22 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
+static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
+ unsigned int nbytes)
+{
+ if (!user_mode(regs))
+ return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
+
+ /*
+ * Make sure userspace isn't trying to trick us into dumping kernel
+ * memory by pointing the userspace instruction pointer at it.
+ */
+ if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
+ return -EINVAL;
+
+ return copy_from_user_nmi(buf, (void __user *)src, nbytes);
+}
+
/*
* There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
*
@@ -97,17 +113,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
u8 opcodes[OPCODE_BUFSIZE];
unsigned long prologue = regs->ip - PROLOGUE_SIZE;
- bool bad_ip;
-
- /*
- * Make sure userspace isn't trying to trick us into dumping kernel
- * memory by pointing the userspace instruction pointer at it.
- */
- bad_ip = user_mode(regs) &&
- __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
- if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue,
- OPCODE_BUFSIZE)) {
+ if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
printk("%sCode: Bad RIP value.\n", loglvl);
} else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6ad43fc44556..2fd698e28e4d 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
* or a page fault), which can make frame pointers
* unreliable.
*/
-
if (IS_ENABLED(CONFIG_FRAME_POINTER))
return -EINVAL;
}
@@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (unwind_error(&state))
return -EINVAL;
- /* Success path for non-user tasks, i.e. kthreads and idle tasks */
- if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
- return -EINVAL;
-
return 0;
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 7f969b2d240f..ec88bbe08a32 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -440,8 +440,11 @@ bool unwind_next_frame(struct unwind_state *state)
/*
* Find the orc_entry associated with the text address.
*
- * Decrement call return addresses by one so they work for sibling
- * calls and calls to noreturn functions.
+ * For a call frame (as opposed to a signal frame), state->ip points to
+ * the instruction after the call. That instruction's stack layout
+ * could be different from the call instruction's layout, for example
+ * if the call was to a noreturn function. So get the ORC data for the
+ * call instruction itself.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
if (!orc) {
@@ -662,6 +665,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
state->sp = task->thread.sp;
state->bp = READ_ONCE_NOCHECK(frame->bp);
state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+ state->signal = (void *)state->ip == ret_from_fork;
}
if (get_stack_info((unsigned long *)state->sp, state->task,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3bfc8dd8a43d..9a03e5b23135 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -358,6 +358,7 @@ SECTIONS
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
+ . = ALIGN(PAGE_SIZE);
*(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..052e0f05a984 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -341,7 +341,8 @@
#define PAGE_ALIGNED_DATA(page_align) \
. = ALIGN(page_align); \
- *(.data..page_aligned)
+ *(.data..page_aligned) \
+ . = ALIGN(page_align);
#define READ_MOSTLY_DATA(align) \
. = ALIGN(align); \
@@ -737,7 +738,9 @@
. = ALIGN(bss_align); \
.bss : AT(ADDR(.bss) - LOAD_OFFSET) { \
BSS_FIRST_SECTIONS \
+ . = ALIGN(PAGE_SIZE); \
*(.bss..page_aligned) \
+ . = ALIGN(PAGE_SIZE); \
*(.dynbss) \
*(BSS_MAIN) \
*(COMMON) \
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-03-02 8:49 Ingo Molnar
@ 2020-03-03 23:35 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-03-03 23:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Andrew Morton, Peter Zijlstra
The pull request you sent on Mon, 2 Mar 2020 09:49:54 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/2873dc25477f483997c99981cf14b43a0d4f84d4
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-03-02 8:49 Ingo Molnar
2020-03-03 23:35 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-03-02 8:49 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Andrew Morton,
Peter Zijlstra
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: bba42affa732d6fd5bd5c9678e6deacde2de1547 x86/mm: Fix dump_pagetables with Xen PV
Misc fixes: a pkeys fix for a bug that triggers with weird BIOS settings,
and two Xen PV fixes: a paravirt interface fix, and pagetable dumping
fix.
Thanks,
Ingo
------------------>
Juergen Gross (2):
x86/ioperm: Add new paravirt function update_io_bitmap()
x86/mm: Fix dump_pagetables with Xen PV
Sean Christopherson (1):
x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes
arch/x86/include/asm/io_bitmap.h | 9 ++++++++-
arch/x86/include/asm/paravirt.h | 7 +++++++
arch/x86/include/asm/paravirt_types.h | 4 ++++
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/paravirt.c | 5 +++++
arch/x86/kernel/process.c | 2 +-
arch/x86/mm/dump_pagetables.c | 7 +------
arch/x86/xen/enlighten_pv.c | 25 +++++++++++++++++++++++++
8 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 02c6ef8f7667..07344d82e88e 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -19,7 +19,14 @@ struct task_struct;
void io_bitmap_share(struct task_struct *tsk);
void io_bitmap_exit(void);
-void tss_update_io_bitmap(void);
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#endif
+
#else
static inline void io_bitmap_share(struct task_struct *tsk) { }
static inline void io_bitmap_exit(void) { }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 86e7317eb31f..694d8daf4983 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_update_io_bitmap(void)
+{
+ PVOP_VCALL0(cpu.update_io_bitmap);
+}
+#endif
+
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 84812964d3dd..732f62e04ddb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,6 +140,10 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
+#ifdef CONFIG_X86_IOPL_IOPERM
+ void (*update_io_bitmap)(void);
+#endif
+
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52c9bfbbdb2a..4cdb123ff66a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
- get_cpu_cap(c);
+ set_cpu_cap(c, X86_FEATURE_OSPKE);
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 789f5e4f89de..c131ba4e70ef 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
#include <asm/tlb.h>
+#include <asm/io_bitmap.h>
/*
* nop stub, which must not clobber anything *including the stack* to
@@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = {
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .cpu.update_io_bitmap = native_tss_update_io_bitmap,
+#endif
+
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 839b5244e3b7..3053c85e0e42 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
/**
* tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
*/
-void tss_update_io_bitmap(void)
+void native_tss_update_io_bitmap(void)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
struct thread_struct *t = ¤t->thread;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 64229dad7eab..69309cd56fdf 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
{
const struct ptdump_range ptdump_ranges[] = {
#ifdef CONFIG_X86_64
-
-#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
-#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
- normalize_addr_shift)
-
{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
- {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
+ {GUARD_HOLE_END_ADDR, ~0UL},
#else
{0, ~0UL},
#endif
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 79409120a603..507f4fb88fa7 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,9 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_update_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap;
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+ native_tss_update_io_bitmap();
+
+ iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+ tss->x86_tss.io_bitmap_base;
+ if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+ iobitmap.nr_ports = 0;
+ else
+ iobitmap.nr_ports = IO_BITMAP_BITS;
+
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
+
static void xen_io_delay(void)
{
}
@@ -1047,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .update_io_bitmap = xen_update_io_bitmap,
+#endif
.io_delay = xen_io_delay,
/* Xen takes care of %gs when switching to usermode for us */
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-01-31 11:52 Ingo Molnar
@ 2020-01-31 19:35 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-01-31 19:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Fri, 31 Jan 2020 12:52:59 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b70a2d6b29f7c5b621bf83f903f26fee5fe28efc
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-01-31 11:52 Ingo Molnar
2020-01-31 19:35 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-01-31 11:52 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 6bd3357b6181bd38c1a757168a8842e09ec6f3fb Merge branches 'x86/hyperv', 'x86/kdump' and 'x86/misc' into x86/urgent, to pick up single-commit branches
Misc fixes:
- three fixes and a cleanup for the resctrl code,
- a HyperV fix,
- a fix to /proc/kcore contents in live debugging sessions,
- a fix for the x86 decoder opcode map.
Thanks,
Ingo
------------------>
Masami Hiramatsu (1):
x86/decoder: Add TEST opcode to Group3-2
Omar Sandoval (1):
x86/crash: Define arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y
Wei Liu (1):
x86/hyper-v: Add "polling" bit to hv_synic_sint
Xiaochen Shen (4):
x86/resctrl: Fix use-after-free when deleting resource groups
x86/resctrl: Fix use-after-free due to inaccurate refcount of rdtgroup
x86/resctrl: Fix a deadlock due to inaccurate reference
x86/resctrl: Clean up unused function parameter in mkdir path
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 48 ++++++++++++++++++----------------
arch/x86/kernel/crash_core_32.c | 17 ++++++++++++
arch/x86/kernel/crash_core_64.c | 24 +++++++++++++++++
arch/x86/kernel/machine_kexec_32.c | 12 ---------
arch/x86/kernel/machine_kexec_64.c | 19 --------------
arch/x86/lib/x86-opcode-map.txt | 2 +-
tools/arch/x86/lib/x86-opcode-map.txt | 2 +-
9 files changed, 71 insertions(+), 57 deletions(-)
create mode 100644 arch/x86/kernel/crash_core_32.c
create mode 100644 arch/x86/kernel/crash_core_64.c
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2020-01-18 18:52 Ingo Molnar
@ 2020-01-18 21:05 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2020-01-18 21:05 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 18 Jan 2020 19:52:58 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/0cc2682d8baaa6c7383e40153afc19239ad28d77
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2020-01-18 18:52 Ingo Molnar
2020-01-18 21:05 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2020-01-18 18:52 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: a006483b2f97af685f0e60f3a547c9ad4c9b9e94 x86/CPU/AMD: Ensure clearing of SME/SEV features is maintained
Misc fixes:
- a resctrl fix for uninitialized objects found by debugobjects,
- a resctrl memory leak fix,
- fix the unintended re-enabling of the of SME and SEV CPU flags if
memory encryption was disabled at bootup via the MSR space.
Thanks,
Ingo
------------------>
Qian Cai (1):
x86/resctrl: Fix an imbalance in domain_remove_cpu()
Shakeel Butt (1):
x86/resctrl: Fix potential memory leak
Tom Lendacky (1):
x86/CPU/AMD: Ensure clearing of SME/SEV features is maintained
arch/x86/kernel/cpu/amd.c | 4 ++--
arch/x86/kernel/cpu/resctrl/core.c | 2 +-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++---
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 90f75e515876..62c30279be77 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -615,9 +615,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
return;
clear_all:
- clear_cpu_cap(c, X86_FEATURE_SME);
+ setup_clear_cpu_cap(X86_FEATURE_SME);
clear_sev:
- clear_cpu_cap(c, X86_FEATURE_SEV);
+ setup_clear_cpu_cap(X86_FEATURE_SEV);
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 03eb90d00af0..89049b343c7a 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -618,7 +618,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
list_del(&d->list);
- if (is_mbm_enabled())
+ if (r->mon_capable && is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
/*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2e3b06d6bbc6..dac7209a0708 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1741,9 +1741,6 @@ static int set_cache_qos_cfg(int level, bool enable)
struct rdt_domain *d;
int cpu;
- if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
if (level == RDT_RESOURCE_L3)
update = l3_qos_cfg_update;
else if (level == RDT_RESOURCE_L2)
@@ -1751,6 +1748,9 @@ static int set_cache_qos_cfg(int level, bool enable)
else
return -EINVAL;
+ if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
+
r_l = &rdt_resources_all[level];
list_for_each_entry(d, &r_l->domains, list) {
/* Pick one CPU from each domain instance to update MSR */
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-12-01 22:22 Ingo Molnar
@ 2019-12-02 4:40 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-12-02 4:40 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Borislav Petkov, Peter Zijlstra,
Thomas Gleixner, Andrew Morton, Andy Lutomirski
The pull request you sent on Sun, 1 Dec 2019 23:22:08 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e5b3fc125d768eacd73bb4dc5019f0ce95635af4
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-12-01 22:22 Ingo Molnar
2019-12-02 4:40 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-12-01 22:22 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Borislav Petkov, Peter Zijlstra, Thomas Gleixner,
Andrew Morton, Andy Lutomirski
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 91298f1a302dad0f0f630413c812818636faa8a0 x86/mm/pat: Fix off-by-one bugs in interval tree search
Various fixes:
- Fix the PAT performance regression that downgraded write-combining
device memory regions to uncached.
- There's been a number of bugs in 32-bit double fault handling -
hopefully all fixed now.
- Fix an LDT crash
- Fix an FPU over-optimization that broke with GCC9 code optimizations.
- Misc cleanups
Thanks,
Ingo
------------------>
Andy Lutomirski (9):
selftests/x86/single_step_syscall: Check SYSENTER directly
lkdtm: Add a DOUBLE_FAULT crash type on x86
x86/traps: Disentangle the 32-bit and 64-bit doublefault code
x86/doublefault/32: Rename doublefault.c to doublefault_32.c
x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area
x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit
x86/traps: die() instead of panicking on a double fault
x86/ptrace: Remove set_segment_reg() implementations for current
x86/ptrace: Document FSBASE and GSBASE ABI oddities
Borislav Petkov (2):
x86/entry/32: Remove unused 'restore_all_notrace' local label
x86/ioperm: Save an indentation level in tss_update_io_bitmap()
Ingo Molnar (1):
x86/mm/pat: Fix off-by-one bugs in interval tree search
Joerg Roedel (1):
x86/mm/32: Sync only to VMALLOC_END in vmalloc_sync_all()
Sebastian Andrzej Siewior (1):
x86/fpu: Don't cache access to fpu_fpregs_owner_ctx
arch/x86/Kconfig.debug | 2 +-
arch/x86/entry/entry_32.S | 43 ++++++-
arch/x86/include/asm/cpu_entry_area.h | 12 ++
arch/x86/include/asm/doublefault.h | 13 +++
arch/x86/include/asm/fpu/internal.h | 2 +-
arch/x86/include/asm/pgtable_32_types.h | 7 +-
arch/x86/include/asm/processor.h | 2 -
arch/x86/include/asm/traps.h | 3 +
arch/x86/kernel/Makefile | 4 +-
arch/x86/kernel/cpu/common.c | 12 +-
arch/x86/kernel/doublefault.c | 86 --------------
arch/x86/kernel/doublefault_32.c | 136 ++++++++++++++++++++++
arch/x86/kernel/dumpstack_32.c | 30 +++++
arch/x86/kernel/process.c | 52 ++++-----
arch/x86/kernel/ptrace.c | 36 ++++--
arch/x86/kernel/traps.c | 31 +++--
arch/x86/mm/cpu_entry_area.c | 14 ++-
arch/x86/mm/fault.c | 2 +-
arch/x86/mm/pat_interval.c | 12 +-
drivers/misc/lkdtm/bugs.c | 39 +++++++
drivers/misc/lkdtm/core.c | 3 +
drivers/misc/lkdtm/lkdtm.h | 3 +
tools/testing/selftests/x86/single_step_syscall.c | 94 +++++++++++++--
23 files changed, 467 insertions(+), 171 deletions(-)
create mode 100644 arch/x86/include/asm/doublefault.h
delete mode 100644 arch/x86/kernel/doublefault.c
create mode 100644 arch/x86/kernel/doublefault_32.c
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-11-16 21:42 Ingo Molnar
@ 2019-11-17 0:35 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-11-17 0:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 16 Nov 2019 22:42:43 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fe30021c36fbfb71d6ff25a424342149e58bba52
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-11-16 21:42 Ingo Molnar
2019-11-17 0:35 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-11-16 21:42 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: c8eafe1495303bfd0eedaa8156b1ee9082ee9642 x86/resctrl: Fix potential lockdep warning
Two fixes: disable unreliable HPET on Intel Coffe Lake platforms, and fix
a lockdep splat in the resctrl code.
Thanks,
Ingo
------------------>
Kai-Heng Feng (1):
x86/quirks: Disable HPET on Intel Coffe Lake platforms
Xiaochen Shen (1):
x86/resctrl: Fix potential lockdep warning
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ----
arch/x86/kernel/early-quirks.c | 2 ++
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..2e3b06d6bbc6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
}
rdtgrp = rdtgroup_kn_lock_live(of->kn);
- rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
- rdt_last_cmd_puts("Directory was removed\n");
goto unlock;
}
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
int ret;
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
- rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
- rdt_last_cmd_puts("Directory was removed\n");
goto out_unlock;
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6f6b1d04dadf..4cba91ec8049 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x3ec4,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-10-12 13:19 Ingo Molnar
@ 2019-10-12 22:35 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-10-12 22:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 12 Oct 2019 15:19:16 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7a275fd7b9519b5cc63270a8964055aadb04de26
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-10-12 13:19 Ingo Molnar
2019-10-12 22:35 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-10-12 13:19 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 8d7c6ac3b2371eb1cbc9925a88f4d10efff374de x86/cpu: Add Comet Lake to the Intel CPU models header
A handful of fixes: a kexec linking fix, an AMD MWAITX fix, a vmware
guest support fix when built under Clang, and new CPU model number
definitions.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
include/linux/string.h # bec500777089: lib/string: Make memzero_exp
lib/string.c # bec500777089: lib/string: Make memzero_exp
Thanks,
Ingo
------------------>
Arvind Sankar (1):
lib/string: Make memzero_explicit() inline instead of external
Janakarajan Natarajan (1):
x86/asm: Fix MWAITX C-state hint value
Kan Liang (1):
x86/cpu: Add Comet Lake to the Intel CPU models header
Sami Tolvanen (1):
x86/cpu/vmware: Use the full form of INL in VMWARE_PORT
arch/x86/include/asm/intel-family.h | 3 +++
arch/x86/include/asm/mwait.h | 2 +-
arch/x86/kernel/cpu/vmware.c | 2 +-
arch/x86/lib/delay.c | 4 ++--
include/linux/string.h | 21 ++++++++++++++++++++-
lib/string.c | 21 ---------------------
6 files changed, 27 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index f04622500da3..c606c0b70738 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -83,6 +83,9 @@
#define INTEL_FAM6_TIGERLAKE_L 0x8C
#define INTEL_FAM6_TIGERLAKE 0x8D
+#define INTEL_FAM6_COMETLAKE 0xA5
+#define INTEL_FAM6_COMETLAKE_L 0xA6
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e28f8b723b5c..9d5252c9685c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -21,7 +21,7 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_LOOPS ((u32)-1)
-#define MWAITX_DISABLE_CSTATES 0xf
+#define MWAITX_DISABLE_CSTATES 0xf0
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 9735139cfdf8..46d732696c1c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,7 +49,7 @@
#define VMWARE_CMD_VCPU_RESERVED 31
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
- __asm__("inl (%%dx)" : \
+ __asm__("inl (%%dx), %%eax" : \
"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
"a"(VMWARE_HYPERVISOR_MAGIC), \
"c"(VMWARE_CMD_##cmd), \
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index b7375dc6898f..c126571e5e2e 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops)
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
/*
- * AMD, like Intel, supports the EAX hint and EAX=0xf
- * means, do not enter any deep C-state and we use it
+ * AMD, like Intel's MWAIT version, supports the EAX hint and
+ * EAX=0xf0 means, do not enter any deep C-state and we use it
* here in delay() to minimize wakeup latency.
*/
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
diff --git a/include/linux/string.h b/include/linux/string.h
index b2f9df7f0761..b6ccdc2c7f02 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -227,7 +227,26 @@ static inline bool strstarts(const char *str, const char *prefix)
}
size_t memweight(const void *ptr, size_t bytes);
-void memzero_explicit(void *s, size_t count);
+
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ * keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * Note: usually using memset() is just fine (!), but in cases
+ * where clearing out _local_ data at the end of a scope is
+ * necessary, memzero_explicit() should be used instead in
+ * order to prevent the compiler from optimising away zeroing.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+static inline void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ barrier_data(s);
+}
/**
* kbasename - return the last part of a pathname.
diff --git a/lib/string.c b/lib/string.c
index cd7a10c19210..08ec58cc673b 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -748,27 +748,6 @@ void *memset(void *s, int c, size_t count)
EXPORT_SYMBOL(memset);
#endif
-/**
- * memzero_explicit - Fill a region of memory (e.g. sensitive
- * keying data) with 0s.
- * @s: Pointer to the start of the area.
- * @count: The size of the area.
- *
- * Note: usually using memset() is just fine (!), but in cases
- * where clearing out _local_ data at the end of a scope is
- * necessary, memzero_explicit() should be used instead in
- * order to prevent the compiler from optimising away zeroing.
- *
- * memzero_explicit() doesn't need an arch-specific version as
- * it just invokes the one of memset() implicitly.
- */
-void memzero_explicit(void *s, size_t count)
-{
- memset(s, 0, count);
- barrier_data(s);
-}
-EXPORT_SYMBOL(memzero_explicit);
-
#ifndef __HAVE_ARCH_MEMSET16
/**
* memset16() - Fill a memory area with a uint16_t
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-09-12 12:57 Ingo Molnar
@ 2019-09-12 14:05 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-09-12 14:05 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Thu, 12 Sep 2019 14:57:10 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/95217783b7f6f331e7a6675e0a31fb9a5a1b9a36
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-09-12 12:57 Ingo Molnar
2019-09-12 14:05 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-09-12 12:57 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: afa8b475c1aec185a8e106c48b3832e0b88bc2de x86/timer: Force PIT initialization when !X86_FEATURE_ARAT
A KVM guest fix, and a kdump kernel relocation errors fix.
Thanks,
Ingo
------------------>
Jan Stancek (1):
x86/timer: Force PIT initialization when !X86_FEATURE_ARAT
Steve Wahl (1):
x86/purgatory: Change compiler flags from -mcmodel=kernel to -mcmodel=large to fix kexec relocation errors
arch/x86/kernel/apic/apic.c | 4 ++++
arch/x86/purgatory/Makefile | 35 +++++++++++++++++++----------------
2 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dba2828b779a..f91b3ff9dc03 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
/* Deadline timer is based on TSC so no further PIT action required */
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 8901a1f89cf5..10fb42da0007 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -18,37 +18,40 @@ targets += purgatory.ro
KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those.
ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE)
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE)
endif
ifdef CONFIG_STACKPROTECTOR
-CFLAGS_REMOVE_sha256.o += -fstack-protector
-CFLAGS_REMOVE_purgatory.o += -fstack-protector
-CFLAGS_REMOVE_string.o += -fstack-protector
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector
+PURGATORY_CFLAGS_REMOVE += -fstack-protector
endif
ifdef CONFIG_STACKPROTECTOR_STRONG
-CFLAGS_REMOVE_sha256.o += -fstack-protector-strong
-CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong
-CFLAGS_REMOVE_string.o += -fstack-protector-strong
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong
+PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
endif
ifdef CONFIG_RETPOLINE
-CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS)
+PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS)
endif
+CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-09-05 8:07 Ingo Molnar
@ 2019-09-05 21:15 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-09-05 21:15 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Thu, 5 Sep 2019 10:07:19 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/19e4147a04a43d210dbacda76e0988f90bb0ba45
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-09-05 8:07 Ingo Molnar
2019-09-05 21:15 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-09-05 8:07 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 4030b4c585c41eeefec7bd20ce3d0e100a0f2e4d x86/hyper-v: Fix overflow bug in fill_gva_list()
Misc fixes:
- a signed kernels EFI boot fix,
- an UBSAN related AC flags fix,
- and a Hyper-V infinite loop fix.
Thanks,
Ingo
------------------>
John S. Gruber (1):
x86/boot: Preserve boot_params.secure_boot from sanitizing
Peter Zijlstra (1):
x86/uaccess: Don't leak the AC flags into __get_user() argument evaluation
Tianyu Lan (1):
x86/hyper-v: Fix overflow bug in fill_gva_list()
arch/x86/hyperv/mmu.c | 8 +++++---
arch/x86/include/asm/bootparam_utils.h | 1 +
arch/x86/include/asm/uaccess.h | 4 +++-
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
* Lower 12 bits encode the number of additional
* pages to flush (in addition to the 'cur' page).
*/
- if (diff >= HV_TLB_FLUSH_UNIT)
+ if (diff >= HV_TLB_FLUSH_UNIT) {
gva_list[gva_n] |= ~PAGE_MASK;
- else if (diff)
+ cur += HV_TLB_FLUSH_UNIT;
+ } else if (diff) {
gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+ cur = end;
+ }
- cur += HV_TLB_FLUSH_UNIT;
gva_n++;
} while (cur < end);
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 9e5f3c722c33..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -70,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(eddbuf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+ BOOT_PARAM_PRESERVE(secure_boot),
BOOT_PARAM_PRESERVE(hdr),
BOOT_PARAM_PRESERVE(e820_table),
BOOT_PARAM_PRESERVE(eddbuf),
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..35c225ede0e4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label: \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
+ __typeof__(ptr) __gu_ptr = (ptr); \
+ __typeof__(size) __gu_size = (size); \
__uaccess_begin_nospec(); \
- __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__builtin_expect(__gu_err, 0); \
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-06-29 9:14 Ingo Molnar
@ 2019-06-29 11:45 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-06-29 11:45 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 29 Jun 2019 11:14:07 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/728254541ebcc7fee869c3c4c3f36f96be791edb
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-06-29 9:14 Ingo Molnar
2019-06-29 11:45 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-06-29 9:14 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: ae6a45a0868986f69039a2150d3b2b9ca294c378 x86/unwind/orc: Fall back to using frame pointers for generated code
Misc fixes all over the place:
- might_sleep() atomicity fix in the microcode loader
- resctrl boundary condition fix
- APIC arithmethics bug fix for frequencies >= 4.2 GHz
- three 5-level paging crash fixes
- two speculation fixes
- a perf/stacktrace fix
Thanks,
Ingo
------------------>
Alejandro Jimenez (1):
x86/speculation: Allow guests to use SSBD even if host does not
Colin Ian King (1):
x86/apic: Fix integer overflow on 10 bit left shift of cpu_khz
Josh Poimboeuf (1):
x86/unwind/orc: Fall back to using frame pointers for generated code
Kirill A. Shutemov (3):
x86/boot/64: Fix crash if kernel image crosses page table boundary
x86/boot/64: Add missing fixup_pointer() for next_early_pgt access
x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init()
Reinette Chatre (1):
x86/resctrl: Prevent possible overrun during bitmap operations
Song Liu (1):
perf/x86: Always store regs->ip in perf_callchain_kernel()
Thomas Gleixner (1):
x86/microcode: Fix the microcode load on CPU hotplug for real
arch/x86/events/core.c | 10 +++++-----
arch/x86/kernel/apic/apic.c | 3 ++-
arch/x86/kernel/cpu/bugs.c | 11 ++++++++++-
arch/x86/kernel/cpu/microcode/core.c | 15 ++++++++++-----
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++++++------------------
arch/x86/kernel/head64.c | 20 ++++++++++---------
arch/x86/kernel/unwind_orc.c | 26 +++++++++++++++++++++----
arch/x86/mm/init_64.c | 24 ++++++++++++-----------
8 files changed, 89 insertions(+), 55 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..4fb3ca1e699d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
- if (perf_hw_regs(regs)) {
- if (perf_callchain_store(entry, regs->ip))
- return;
+ if (perf_callchain_store(entry, regs->ip))
+ return;
+
+ if (perf_hw_regs(regs))
unwind_start(&state, current, regs, NULL);
- } else {
+ else
unwind_start(&state, current, NULL, (void *)regs->sp);
- }
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 177aa8ef2afa..85be316665b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void)
if (queued) {
if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ max_loops = (long long)cpu_khz << 10;
+ max_loops -= ntsc - tsc;
} else {
max_loops--;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
break;
}
+ /*
+ * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+ * bit in the mask to allow guests to use the mitigation even in the
+ * case where the host does not enable it.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ }
+
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
};
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
{
- struct device *dev;
-
- dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+ return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
goto out_ucode_group;
register_syscore_ops(&mc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);
pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 869cbef5da81..f9d8ed6ab03b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
@@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
*/
static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
+ unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
- if (*val == 0)
+ if (val == 0)
return;
- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ *_val = (u32)val;
}
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (la57) {
- p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+ p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
+ physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
- i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
- p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
- p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
+ i = physaddr >> P4D_SHIFT;
+ p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+ p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
- i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
- pud[i + 0] = (pudval_t)pmd + pgtable_flags;
- pud[i + 1] = (pudval_t)pmd + pgtable_flags;
+ i = physaddr >> PUD_SHIFT;
+ pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+ pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry += physaddr;
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
- int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
- pmd[idx] = pmd_entry + i * PMD_SIZE;
+ int idx = i + (physaddr >> PMD_SHIFT);
+
+ pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
}
/*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b66b5c5aec..72b997eaa1fc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
* But they are copies of the ftrace entries that are static and
* defined in ftrace_*.S, which do have orc entries.
*
- * If the undwinder comes across a ftrace trampoline, then find the
+ * If the unwinder comes across a ftrace trampoline, then find the
* ftrace function that was used to create it, and use that ftrace
- * function's orc entrie, as the placement of the return code in
+ * function's orc entry, as the placement of the return code in
* the stack will be identical.
*/
static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+ .end = 0,
+};
+
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
* calls and calls to noreturn functions.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
- if (!orc)
- goto err;
+ if (!orc) {
+ /*
+ * As a fallback, try to assume this code uses a frame pointer.
+ * This is useful for generated code, like BPF, which ORC
+ * doesn't know about. This is just a guess, so the rest of
+ * the unwind is no longer considered reliable.
+ */
+ orc = &orc_fp_entry;
+ state->error = true;
+ }
/* End-of-stack check for kernel threads: */
if (orc->sp_reg == ORC_REG_UNDEFINED) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..0f01c7b1d217 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,23 +671,25 @@ static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, bool init)
{
- unsigned long paddr_next, paddr_last = paddr_end;
- unsigned long vaddr = (unsigned long)__va(paddr);
- int i = p4d_index(vaddr);
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+
+ paddr_last = paddr_end;
+ vaddr = (unsigned long)__va(paddr);
+ vaddr_end = (unsigned long)__va(paddr_end);
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
page_size_mask, init);
- for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d;
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ p4d_t *p4d = p4d_page + p4d_index(vaddr);
pud_t *pud;
- vaddr = (unsigned long)__va(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+ vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
+ paddr = __pa(vaddr);
if (paddr >= paddr_end) {
+ paddr_next = __pa(vaddr_next);
if (!after_bootmem &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RAM) &&
@@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask, init);
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-06-02 17:44 Ingo Molnar
@ 2019-06-02 18:15 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-06-02 18:15 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 2 Jun 2019 19:44:42 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7bd1d5edd0160b615ab8748cf94dabcab1fb01cb
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-06-02 17:44 Ingo Molnar
2019-06-02 18:15 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-06-02 17:44 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 2ac44ab608705948564791ce1d15d43ba81a1e38 x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor
Two fixes: a quirk for KVM guests running on certain AMD CPUs, and a
KASAN related build fix.
Thanks,
Ingo
------------------>
Ard Biesheuvel (1):
x86/boot: Provide KASAN compatible aliases for string routines
Frank van der Linden (1):
x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor
arch/x86/boot/compressed/string.c | 14 ++++++++++----
arch/x86/kernel/cpu/amd.c | 7 +++++--
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 19dbbcdd1a53..81fc1eaa3229 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -11,7 +11,7 @@
#include "../string.c"
#ifdef CONFIG_X86_32
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
@@ -25,7 +25,7 @@ static void *__memcpy(void *dest, const void *src, size_t n)
return dest;
}
#else
-static void *__memcpy(void *dest, const void *src, size_t n)
+static void *____memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
@@ -56,7 +56,7 @@ void *memmove(void *dest, const void *src, size_t n)
const unsigned char *s = src;
if (d <= s || d - s >= n)
- return __memcpy(dest, src, n);
+ return ____memcpy(dest, src, n);
while (n-- > 0)
d[n] = s[n];
@@ -71,5 +71,11 @@ void *memcpy(void *dest, const void *src, size_t n)
warn("Avoiding potentially unsafe overlapping memcpy()!");
return memmove(dest, src, n);
}
- return __memcpy(dest, src, n);
+ return ____memcpy(dest, src, n);
}
+
+#ifdef CONFIG_KASAN
+extern void *__memset(void *s, int c, size_t n) __alias(memset);
+extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove);
+extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy);
+#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 80a405c2048a..8d4e50428b68 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -824,8 +824,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
- /* Fix erratum 1076: CPB feature bit not being set in CPUID. */
- if (!cpu_has(c, X86_FEATURE_CPB))
+ /*
+ * Fix erratum 1076: CPB feature bit not being set in CPUID.
+ * Always set it, except when running under a hypervisor.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-05-16 16:26 Ingo Molnar
@ 2019-05-16 18:20 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-05-16 18:20 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Thu, 16 May 2019 18:26:16 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/d396360acdf7e57edcd9e2d080343b0353d65d63
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-05-16 16:26 Ingo Molnar
2019-05-16 18:20 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-05-16 16:26 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 9d8d0294e78a164d407133dea05caf4b84247d6a x86/speculation/mds: Improve CPU buffer clear documentation
Misc fixes and updates:
- a handful of MDS documentation/comment updates
- a cleanup related to hweight interfaces
- a SEV guest fix for large pages
- a kprobes LTO fix
- and a final cleanup commit for vDSO HPET support removal.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
lib/hweight.c # 409ca45526a4: x86/kconfig: Disable CONFIG_
Thanks,
Ingo
------------------>
Andi Kleen (1):
x86/kprobes: Make trampoline_handler() global and visible
Andy Lutomirski (2):
x86/speculation/mds: Revert CPU buffer clear on double fault exit
x86/speculation/mds: Improve CPU buffer clear documentation
Brijesh Singh (1):
x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page
Jia Zhang (1):
x86/vdso: Remove hpet_page from vDSO
Masahiro Yamada (1):
x86/kconfig: Disable CONFIG_GENERIC_HWEIGHT and remove __HAVE_ARCH_SW_HWEIGHT
Documentation/x86/mds.rst | 44 ++---------
arch/x86/Kconfig | 3 -
arch/x86/entry/vdso/vdso2c.c | 3 -
arch/x86/include/asm/arch_hweight.h | 2 -
arch/x86/include/asm/vdso.h | 1 -
arch/x86/kernel/kprobes/core.c | 2 +-
arch/x86/kernel/traps.c | 8 --
arch/x86/mm/init_64.c | 144 ++++++++++++++++++++++++++----------
arch/x86/mm/mem_encrypt.c | 10 ++-
arch/x86/mm/mm_internal.h | 3 +
lib/hweight.c | 4 -
11 files changed, 121 insertions(+), 103 deletions(-)
diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
index 534e9baa4e1d..5d4330be200f 100644
--- a/Documentation/x86/mds.rst
+++ b/Documentation/x86/mds.rst
@@ -142,45 +142,13 @@ Mitigation points
mds_user_clear.
The mitigation is invoked in prepare_exit_to_usermode() which covers
- most of the kernel to user space transitions. There are a few exceptions
- which are not invoking prepare_exit_to_usermode() on return to user
- space. These exceptions use the paranoid exit code.
+ all but one of the kernel to user space transitions. The exception
+ is when we return from a Non Maskable Interrupt (NMI), which is
+ handled directly in do_nmi().
- - Non Maskable Interrupt (NMI):
-
- Access to sensible data like keys, credentials in the NMI context is
- mostly theoretical: The CPU can do prefetching or execute a
- misspeculated code path and thereby fetching data which might end up
- leaking through a buffer.
-
- But for mounting other attacks the kernel stack address of the task is
- already valuable information. So in full mitigation mode, the NMI is
- mitigated on the return from do_nmi() to provide almost complete
- coverage.
-
- - Double fault (#DF):
-
- A double fault is usually fatal, but the ESPFIX workaround, which can
- be triggered from user space through modify_ldt(2) is a recoverable
- double fault. #DF uses the paranoid exit path, so explicit mitigation
- in the double fault handler is required.
-
- - Machine Check Exception (#MC):
-
- Another corner case is a #MC which hits between the CPU buffer clear
- invocation and the actual return to user. As this still is in kernel
- space it takes the paranoid exit path which does not clear the CPU
- buffers. So the #MC handler repopulates the buffers to some
- extent. Machine checks are not reliably controllable and the window is
- extremly small so mitigation would just tick a checkbox that this
- theoretical corner case is covered. To keep the amount of special
- cases small, ignore #MC.
-
- - Debug Exception (#DB):
-
- This takes the paranoid exit path only when the INT1 breakpoint is in
- kernel space. #DB on a user space address takes the regular exit path,
- so no extra mitigation required.
+ (The reason that NMI is special is that prepare_exit_to_usermode() can
+ enable IRQs. In NMI context, NMIs are blocked, and we don't want to
+ enable IRQs with NMIs blocked.)
2. C-State transition
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 326b2d5bab9d..6bc9dd6e7534 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -259,9 +259,6 @@ config GENERIC_BUG
config GENERIC_BUG_RELATIVE_POINTERS
bool
-config GENERIC_HWEIGHT
- def_bool y
-
config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8e470b018512..3a4d8d4d39f8 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,14 +73,12 @@ const char *outfilename;
enum {
sym_vvar_start,
sym_vvar_page,
- sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
const int special_pages[] = {
sym_vvar_page,
- sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
@@ -93,7 +91,6 @@ struct vdso_sym {
struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
- [sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
{"VDSO32_NOTE_MASK", true},
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index fc0693569f7a..ba88edd0d58b 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -12,8 +12,6 @@
#define REG_OUT "a"
#endif
-#define __HAVE_ARCH_SW_HWEIGHT
-
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..230474e2ddb5 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -19,7 +19,6 @@ struct vdso_image {
long sym_vvar_start; /* Negative offset to the vvar area */
long sym_vvar_page;
- long sym_hpet_page;
long sym_pvclock_page;
long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index cf52ee0d8711..9e4fa2484d10 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -768,7 +768,7 @@ static struct kprobe kretprobe_kprobe = {
/*
* Called from kretprobe_trampoline
*/
-static __used void *trampoline_handler(struct pt_regs *regs)
+__used __visible void *trampoline_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7de466eb960b..8b6d03e55d2f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,7 +58,6 @@
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
#include <asm/trace/mpx.h>
-#include <asm/nospec-branch.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
@@ -368,13 +367,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
regs->ip = (unsigned long)general_protection;
regs->sp = (unsigned long)&gpregs->orig_ax;
- /*
- * This situation can be triggered by userspace via
- * modify_ldt(2) and the return does not take the regular
- * user space exit, so a CPU buffer clear is required when
- * MDS mitigation is enabled.
- */
- mds_user_clear_cpu_buffers();
return;
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 20d14254b686..62fc457f3849 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,6 +58,37 @@
#include "ident_map.c"
+#define DEFINE_POPULATE(fname, type1, type2, init) \
+static inline void fname##_init(struct mm_struct *mm, \
+ type1##_t *arg1, type2##_t *arg2, bool init) \
+{ \
+ if (init) \
+ fname##_safe(mm, arg1, arg2); \
+ else \
+ fname(mm, arg1, arg2); \
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+#define DEFINE_ENTRY(type1, type2, init) \
+static inline void set_##type1##_init(type1##_t *arg1, \
+ type2##_t arg2, bool init) \
+{ \
+ if (init) \
+ set_##type1##_safe(arg1, arg2); \
+ else \
+ set_##type1(arg1, arg2); \
+}
+
+DEFINE_ENTRY(p4d, p4d, init)
+DEFINE_ENTRY(pud, pud, init)
+DEFINE_ENTRY(pmd, pmd, init)
+DEFINE_ENTRY(pte, pte, init)
+
+
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
@@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
*/
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
- pgprot_t prot)
+ pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pte_safe(pte, __pte(0));
+ set_pte_init(pte, __pte(0), init);
continue;
}
@@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
- set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+ set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
@@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask, pgprot_t prot)
+ unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pmd_safe(pmd, __pmd(0));
+ set_pmd_init(pmd, __pmd(0), init);
continue;
}
@@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
paddr_last = phys_pte_init(pte, paddr,
- paddr_end, prot);
+ paddr_end, prot,
+ init);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pmd,
- pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | _PAGE_PSE)));
+ set_pte_init((pte_t *)pmd,
+ pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
- paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+ paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
spin_lock(&init_mm.page_table_lock);
- pmd_populate_kernel_safe(&init_mm, pmd, pte);
+ pmd_populate_kernel_init(&init_mm, pmd, pte, init);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pud_safe(pud, __pud(0));
+ set_pud_init(pud, __pud(0), init);
continue;
}
@@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
page_size_mask,
- prot);
+ prot, init);
continue;
}
/*
@@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pud,
- pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE));
+ set_pte_init((pte_t *)pud,
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
@@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
pmd = alloc_low_page();
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
- page_size_mask, prot);
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
- pud_populate_safe(&init_mm, pud, pmd);
+ pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock);
}
@@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long paddr_next, paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
if (!pgtable_l5_enabled())
- return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+ return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+ page_size_mask, init);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
p4d_t *p4d;
@@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_p4d_safe(p4d, __p4d(0));
+ set_p4d_init(p4d, __p4d(0), init);
continue;
}
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr,
- paddr_end,
- page_size_mask);
+ paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
- p4d_populate_safe(&init_mm, p4d, pud);
+ p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}
return paddr_last;
}
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
- unsigned long paddr_end,
- unsigned long page_size_mask)
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask,
+ bool init)
{
bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
- page_size_mask);
+ page_size_mask,
+ init);
continue;
}
p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
- pgd_populate_safe(&init_mm, pgd, p4d);
+ pgd_populate_init(&init_mm, pgd, p4d, init);
else
- p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
+ p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
+ (pud_t *) p4d, init);
+
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
@@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
return paddr_last;
}
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. Note that it can only be used to populate non-present entries.
+ * The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, true);
+}
+
+/*
+ * This function is similar to kernel_physical_mapping_init() above with the
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
+ * when updating the mapping. The caller is responsible to flush the TLBs after
+ * the function returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, false);
+}
+
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..51f50a7a07ef 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
else
split_page_size_mask = 1 << PG_LEVEL_2M;
- kernel_physical_mapping_init(__pa(vaddr & pmask),
- __pa((vaddr_end & pmask) + psize),
- split_page_size_mask);
+ /*
+ * kernel_physical_mapping_change() does not flush the TLBs, so
+ * a TLB flush is required after we exit from the for loop.
+ */
+ kernel_physical_mapping_change(__pa(vaddr & pmask),
+ __pa((vaddr_end & pmask) + psize),
+ split_page_size_mask);
}
ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask);
void zone_sizes_init(void);
extern int after_bootmem;
diff --git a/lib/hweight.c b/lib/hweight.c
index 7660d88fd496..c94586b62551 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -10,7 +10,6 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/
-#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -27,7 +26,6 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
-#endif
unsigned int __sw_hweight16(unsigned int w)
{
@@ -46,7 +44,6 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);
-#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
@@ -69,4 +66,3 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
-#endif
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-04-27 14:42 Ingo Molnar
@ 2019-04-27 18:45 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-04-27 18:45 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 27 Apr 2019 16:42:56 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/037904a22bf8b2c999a6e2a8ba971b549c1e9600
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-27 14:42 Ingo Molnar
2019-04-27 18:45 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-04-27 14:42 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 0d02113b31b2017dd349ec9df2314e798a90fa6e x86/mm: Fix a crash with kmemleak_scan()
Two fixes:
- Fix an early boot crash in the RSDP parsing code by effectively
turning off the parsing call - we ran out of time but want to fix the
regression. The more involved fix is being worked on.
- Fix a crash that can trigger in the kmemlek code.
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/boot: Disable RSDP parsing temporarily
Qian Cai (1):
x86/mm: Fix a crash with kmemleak_scan()
arch/x86/boot/compressed/misc.c | 2 +-
arch/x86/mm/init.c | 6 ++++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..5a237e8dbf8d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
boot_params->hdr.loadflags &= ~KASLR_FLAG;
/* Save RSDP address for later use. */
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
+ /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
sanitize_boot_params(boot_params);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080..8dacdb96899e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
#include <linux/memblock.h>
#include <linux/swapfile.h>
#include <linux/swapops.h>
+#include <linux/kmemleak.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
if (debug_pagealloc_enabled()) {
pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
begin, end - 1);
+ /*
+ * Inform kmemleak about the hole in the memory since the
+ * corresponding pages will be unmapped.
+ */
+ kmemleak_free_part((void *)begin, end - begin);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
} else {
/*
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-04-20 7:38 Ingo Molnar
@ 2019-04-20 19:25 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-04-20 19:25 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sat, 20 Apr 2019 09:38:09 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1fd91d719eb1ae83ef500eb4148d11db9db39a41
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-20 7:38 Ingo Molnar
2019-04-20 19:25 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-04-20 7:38 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 2ee27796f298b710992a677a7e4d35c8c588b17e x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority
Misc fixes all over the place: a console spam fix, section attributes
fixes, a KASLR fix, a TLB stack-variable alignment fix, a reboot quirk,
boot options related warnings fix, an LTO fix, a deadlock fix and an RDT
fix.
Thanks,
Ingo
------------------>
Andi Kleen (1):
x86/cpu/bugs: Use __initconst for 'const' init data
Baoquan He (1):
x86/mm/KASLR: Fix the size of the direct mapping section
Colin Ian King (1):
x86/Kconfig: Fix spelling mistake "effectivness" -> "effectiveness"
Hans de Goede (1):
x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority
Jian-Hong Pan (1):
x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T
Peter Zijlstra (1):
x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info"
Sami Tolvanen (1):
x86/build/lto: Fix truncated .bss with -fdata-sections
Thomas Gleixner (2):
x86/speculation: Prevent deadlock on ssb_state::lock
x86/mm: Prevent bogus warnings with "noexec=off"
Xiaochen Shen (1):
x86/resctrl: Do not repeat rdtgroup mode initialization
arch/x86/Kconfig | 2 +-
arch/x86/kernel/cpu/bugs.c | 6 +++---
arch/x86/kernel/cpu/intel.c | 4 ++--
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 ++-
arch/x86/kernel/process.c | 8 ++++++--
arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++
arch/x86/kernel/vmlinux.lds.S | 2 +-
arch/x86/mm/dump_pagetables.c | 3 ++-
arch/x86/mm/ioremap.c | 2 +-
arch/x86/mm/kaslr.c | 2 +-
arch/x86/mm/tlb.c | 2 +-
include/linux/efi.h | 7 ++++++-
12 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..01dbb05bc498 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
- helps to determine the effectivness of preserving large and huge
+ helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
config ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4..b91b3bfa5cfb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
@@ -419,7 +419,7 @@ static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
@@ -658,7 +658,7 @@ static const char * const ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initdata = {
+} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+ pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 54b9eef3eea9..85212a32b54d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
- rdtgrp->mode = RDT_MODE_SHAREABLE;
}
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
return 0;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a..957eae13b370 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
+ lockdep_assert_irqs_disabled();
+
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
void speculation_ctrl_update(unsigned long tif)
{
+ unsigned long flags;
+
/* Forced update. Make sure all relevant TIF flags are different */
- preempt_disable();
+ local_irq_save(flags);
__speculation_ctrl_update(~tif, tif);
- preempt_enable();
+ local_irq_restore(flags);
}
/* Called from seccomp/prctl update */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c0..8fd3cedd9acc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+ reboot_type = BOOT_EFI;
+ pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
void __noreturn machine_real_restart(unsigned int type)
{
local_irq_disable();
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
+ { /* Handle reboot issue on Acer TravelMate X514-51T */
+ .callback = set_efi_reboot,
+ .ident = "Acer TravelMate X514-51T",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+ },
+ },
/* Apple */
{ /* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..a5127b2c195f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,7 +362,7 @@ SECTIONS
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
- *(.bss)
+ *(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab46941..c0309ea9abee 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
#endif
/* Account the WX pages */
st->wx_pages += npages;
- WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+ WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+ "x86/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a4..dd73d5d74393 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
pte = early_ioremap_pte(addr);
/* Sanitize 'prot' against any unsupported bits: */
- pgprot_val(flags) &= __default_kernel_pte_mask;
+ pgprot_val(flags) &= __supported_pte_mask;
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e9..d669c5e797e0 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;
- kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075..487b8474c01c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
{
int cpu;
- struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+ struct flush_tlb_info info = {
.mm = mm,
.stride_shift = stride_shift,
.freed_tables = freed_tables,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 54357a258b35..6ebc2098cfe1 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
struct screen_info *si, efi_guid_t *proto,
unsigned long size);
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
extern unsigned long efi_call_virt_save_flags(void);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-04-12 13:10 Ingo Molnar
@ 2019-04-13 4:05 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-04-13 4:05 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Fri, 12 Apr 2019 15:10:42 +0200:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/6d0a598489ca687e1ebac37eef546526eca41347
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-04-12 13:10 Ingo Molnar
2019-04-13 4:05 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-04-12 13:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 x86/asm: Use stricter assembly constraints in bitops
Fix typos in user-visible resctrl parameters, and also fix assembly
constraint bugs that might result in miscompilation.
Thanks,
Ingo
------------------>
Alexander Potapenko (1):
x86/asm: Use stricter assembly constraints in bitops
Xiaochen Shen (1):
x86/resctrl: Fix typos in the mba_sc mount option
arch/x86/include/asm/bitops.h | 41 +++++++++++++++-------------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++---
2 files changed, 21 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..8e790ec219a5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
-#define ADDR BITOP_ADDR(addr)
+#define ADDR RLONG_ADDR(addr)
/*
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
/**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "memory");
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
- : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
- : CC_OUT(s) (negative), ADDR
+ : CC_OUT(s) (negative), WBYTE_ADDR(addr)
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
* __clear_bit() is non-atomic and implies release semantics before the memory
* operation. It can be used for an unlock if no other CPUs can concurrently
* modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
*/
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
- barrier();
__clear_bit(nr, addr);
}
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
- : BITOP_ADDR(addr)
- : "Ir" (nr));
+ : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr));
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
- : CC_OUT(c) (oldbit), ADDR
- : "Ir" (nr) : "memory");
+ : CC_OUT(c) (oldbit)
+ : ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
- : "m" (*(unsigned long *)addr), "Ir" (nr));
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 399601eda8e4..54b9eef3eea9 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2039,14 +2039,14 @@ static int rdt_get_tree(struct fs_context *fc)
enum rdt_param {
Opt_cdp,
Opt_cdpl2,
- Opt_mba_mpbs,
+ Opt_mba_mbps,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_mpbs", Opt_mba_mpbs),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
{}
};
@@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
- case Opt_mba_mpbs:
+ case Opt_mba_mbps:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-02-17 10:19 Ingo Molnar
@ 2019-02-17 16:50 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-02-17 16:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 17 Feb 2019 11:19:08 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8d33316d520501f24fef180ea5b860ecb9e64506
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-02-17 10:19 Ingo Molnar
2019-02-17 16:50 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-02-17 10:19 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: f331e766c4be33f4338574f3c9f7f77e98ab4571 x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls
Three changes:
- An UV fix/quirk to pull UV BIOS calls into the efi_runtime_lock
locking regime. (This done by aliasing __efi_uv_runtime_lock to
efi_runtime_lock, which should make the quirk nature obvious and
maintain the general policy that the EFI lock (name...) isn't exposed
to drivers.)
- Our version of MAGA: Make a.out Great Again.
- Add a new Intel model name enumerator to an upstream header to help
reduce dependencies going forward.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
drivers/firmware/efi/runtime-wrappers.c# f331e766c4be: x86/platform/UV: Use efi_run
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/a.out: Clear the dump structure initially
Hedi Berriche (1):
x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls
Rajneesh Bhardwaj (1):
x86/CPU: Add Icelake model number
arch/x86/ia32/ia32_aout.c | 6 ++++--
arch/x86/include/asm/intel-family.h | 2 ++
arch/x86/include/asm/uv/bios.h | 8 +++++++-
arch/x86/platform/uv/bios_uv.c | 23 +++++++++++++++++++++--
drivers/firmware/efi/runtime-wrappers.c | 7 +++++++
5 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index f65b78d32f5e..7dbbe9ffda17 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -51,7 +51,7 @@ static unsigned long get_dr(int n)
/*
* fill in the user structure for a core dump..
*/
-static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
+static void fill_dump(struct pt_regs *regs, struct user32 *dump)
{
u32 fs, gs;
memset(dump, 0, sizeof(*dump));
@@ -157,10 +157,12 @@ static int aout_core_dump(struct coredump_params *cprm)
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
+
+ fill_dump(cprm->regs, &dump);
+
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
dump.u_ar0 = offsetof(struct user32, regs);
dump.signal = cprm->siginfo->si_signo;
- dump_thread32(cprm->regs, &dump);
/*
* If the size of the dump file exceeds the rlimit, then see
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index d9a9993af882..9f15384c504a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,8 @@
#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..3f697a9e3f59 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -48,7 +48,8 @@ enum {
BIOS_STATUS_SUCCESS = 0,
BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
BIOS_STATUS_EINVAL = -EINVAL,
- BIOS_STATUS_UNAVAIL = -EBUSY
+ BIOS_STATUS_UNAVAIL = -EBUSY,
+ BIOS_STATUS_ABORT = -EINTR,
};
/* Address map parameters */
@@ -167,4 +168,9 @@ extern long system_serial_number;
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
#endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 4a6a5a26c582..eb33432f2f24 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -29,7 +29,8 @@
struct uv_systab *uv_systab;
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
{
struct uv_systab *tab = uv_systab;
s64 ret;
@@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
return ret;
}
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+{
+ s64 ret;
+
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
+ up(&__efi_uv_runtime_lock);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(uv_bios_call);
s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
unsigned long bios_flags;
s64 ret;
+ if (down_interruptible(&__efi_uv_runtime_lock))
+ return BIOS_STATUS_ABORT;
+
local_irq_save(bios_flags);
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+ ret = __uv_bios_call(which, a1, a2, a3, a4, a5);
local_irq_restore(bios_flags);
+ up(&__efi_uv_runtime_lock);
+
return ret;
}
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 8903b9ccfc2b..e2abfdb5cee6 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -146,6 +146,13 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
*/
static DEFINE_SEMAPHORE(efi_runtime_lock);
+/*
+ * Expose the EFI runtime lock to the UV platform
+ */
+#ifdef CONFIG_X86_UV
+extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
+#endif
+
/*
* Calls the appropriate efi_runtime_service() with the appropriate
* arguments.
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-02-10 9:13 Ingo Molnar
@ 2019-02-10 18:30 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-02-10 18:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 10 Feb 2019 10:13:40 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/aadaa8061189a9e5d8a1327b328453d663e8cbc9
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-02-10 9:13 Ingo Molnar
2019-02-10 18:30 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-02-10 9:13 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 20e55bc17dd01f13cec0eb17e76e9511b23963ef x86/mm: Make set_pmd_at() paravirt aware
A handful of fixes:
- Fix an MCE corner case bug/crash found via MCE injection testing
- Fix 5-level paging boot crash
- Fix MCE recovery cache invalidation bug
- Fix regression on Xen guests caused by a recent PMD level mremap
speedup optimization
Thanks,
Ingo
------------------>
Juergen Gross (1):
x86/mm: Make set_pmd_at() paravirt aware
Kirill A. Shutemov (1):
x86/boot/compressed/64: Do not corrupt EDX on EFER.LME=1 setting
Peter Zijlstra (1):
x86/mm/cpa: Fix set_mce_nospec()
Tony Luck (1):
x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out()
arch/x86/boot/compressed/head_64.S | 2 ++
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/kernel/cpu/mce/core.c | 1 +
arch/x86/mm/pageattr.c | 50 +++++++++++++++++++-------------------
4 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f105ae8651c9..f62e347862cc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src)
3:
/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
pushl %ecx
+ pushl %edx
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_LME, %eax
wrmsr
+ popl %edx
popl %ecx
/* Enable PAE and LA57 (if required) paging modes */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..2779ace16d23 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- native_set_pmd(pmdp, pmd);
+ set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 672c7225cb1b..6ce290c506d9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ m->bank = i;
mce_read_aux(m, i);
*msg = tmp;
return 1;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4f8972311a77..14e6119838a6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
#endif
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+ return (long)(addr << 1) >> 1;
+#else
+ return addr;
+#endif
+}
+
static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
{
if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
unsigned int i;
for (i = 0; i < cpa->numpages; i++)
- __flush_tlb_one_kernel(__cpa_addr(cpa, i));
+ __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
}
static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
* Only flush present addresses:
*/
if (pte && (pte_val(*pte) & _PAGE_PRESENT))
- clflush_cache_range_opt((void *)addr, PAGE_SIZE);
+ clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
}
mb();
}
@@ -1627,29 +1650,6 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
return ret;
}
-/*
- * Machine check recovery code needs to change cache mode of poisoned
- * pages to UC to avoid speculative access logging another error. But
- * passing the address of the 1:1 mapping to set_memory_uc() is a fine
- * way to encourage a speculative access. So we cheat and flip the top
- * bit of the address. This works fine for the code that updates the
- * page tables. But at the end of the process we need to flush the cache
- * and the non-canonical address causes a #GP fault when used by the
- * CLFLUSH instruction.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long make_addr_canonical_again(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
- return (long)(addr << 1) >> 1;
-#else
- return addr;
-#endif
-}
-
-
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr,
int force_split, int in_flag,
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2019-01-11 7:14 Ingo Molnar
@ 2019-01-11 18:00 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2019-01-11 18:00 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Fri, 11 Jan 2019 08:14:54 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/e8af37f3f488e7adce2b5c6f6dfe8c83c2662e1f
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2019-01-11 7:14 Ingo Molnar
2019-01-11 18:00 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2019-01-11 7:14 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: e4f358916d528d479c3c12bd2fd03f2d5a576380 x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE
A 32-bit build fix, CONFIG_RETPOLINE fixes and rename CONFIG_RESCTRL to
CONFIG_X86_RESCTRL.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
include/linux/module.h # e4f358916d52: x86, modpost: Replace last r
samples/seccomp/Makefile # a77d1d196bc6: samples/seccomp: Fix 32-bit
scripts/mod/modpost.c # e4f358916d52: x86, modpost: Replace last r
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/cache: Rename config option to CONFIG_X86_RESCTRL
Tycho Andersen (1):
samples/seccomp: Fix 32-bit build
WANG Chao (1):
x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE
Documentation/x86/resctrl_ui.txt | 2 +-
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/resctrl_sched.h | 4 ++--
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/bugs.c | 2 +-
arch/x86/kernel/cpu/resctrl/Makefile | 4 ++--
include/linux/compiler-gcc.h | 2 +-
include/linux/module.h | 2 +-
include/linux/sched.h | 2 +-
samples/seccomp/Makefile | 1 +
scripts/mod/modpost.c | 2 +-
11 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt
index d9aed8303984..e8e8d14d3c4e 100644
--- a/Documentation/x86/resctrl_ui.txt
+++ b/Documentation/x86/resctrl_ui.txt
@@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com>
Tony Luck <tony.luck@intel.com>
Vikas Shivappa <vikas.shivappa@intel.com>
-This feature is enabled by the CONFIG_RESCTRL and the X86 /proc/cpuinfo
+This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo
flag bits:
RDT (Resource Director Technology) Allocation - "rdt_a"
CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6185d4f33296..15af091611e2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -446,7 +446,7 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.
-config RESCTRL
+config X86_RESCTRL
bool "Resource Control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
select KERNFS
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
index 54990fe2a3ae..40ebddde6ac2 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl_sched.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_RESCTRL_SCHED_H
#define _ASM_X86_RESCTRL_SCHED_H
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
#include <linux/sched.h>
#include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
static inline void resctrl_sched_in(void) {}
-#endif /* CONFIG_RESCTRL */
+#endif /* CONFIG_X86_RESCTRL */
#endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ac78f90aea56..b6fa0869f7aa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
-obj-$(CONFIG_RESCTRL) += resctrl/
+obj-$(CONFIG_X86_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8654b8b0c848..1de0f4170178 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -215,7 +215,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
SPECTRE_V2_USER_NONE;
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
static bool spectre_v2_bad_module;
bool retpoline_module_ok(bool has_retpoline)
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 6895049ceef7..1cabe6fd8e11 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_RESCTRL) += core.o rdtgroup.o monitor.o
-obj-$(CONFIG_RESCTRL) += ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_RESCTRL) += core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_RESCTRL) += ctrlmondata.o pseudo_lock.o
CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5776da43da97..dd8268f5f5f0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,7 +68,7 @@
*/
#define uninitialized_var(x) x = x
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
#define __noretpoline __attribute__((__indirect_branch__("keep")))
#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index 9a21fe3509af..8fa38d3e7538 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -828,7 +828,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
static inline void module_bug_cleanup(struct module *mod) {}
#endif /* CONFIG_GENERIC_BUG */
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
extern bool retpoline_module_ok(bool has_retpoline);
#else
static inline bool retpoline_module_ok(bool has_retpoline)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89541d248893..224666226e87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -995,7 +995,7 @@ struct task_struct {
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list;
#endif
-#ifdef CONFIG_RESCTRL
+#ifdef CONFIG_X86_RESCTRL
u32 closid;
u32 rmid;
#endif
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 4920903c8009..fb43a814d4c0 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -34,6 +34,7 @@ HOSTCFLAGS_bpf-direct.o += $(MFLAG)
HOSTCFLAGS_dropper.o += $(MFLAG)
HOSTCFLAGS_bpf-helper.o += $(MFLAG)
HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
+HOSTCFLAGS_user-trap.o += $(MFLAG)
HOSTLDLIBS_bpf-direct += $(MFLAG)
HOSTLDLIBS_bpf-fancy += $(MFLAG)
HOSTLDLIBS_dropper += $(MFLAG)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 0de2fb236640..26bf886bd168 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2185,7 +2185,7 @@ static void add_intree_flag(struct buffer *b, int is_intree)
/* Cannot check for assembler */
static void add_retpoline(struct buffer *b)
{
- buf_printf(b, "\n#ifdef RETPOLINE\n");
+ buf_printf(b, "\n#ifdef CONFIG_RETPOLINE\n");
buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
buf_printf(b, "#endif\n");
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-12-21 12:25 Ingo Molnar
@ 2018-12-21 19:30 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2018-12-21 19:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton, Masahiro Yamada
The pull request you sent on Fri, 21 Dec 2018 13:25:45 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/70ad6368e878857db315788dab36817aa992c86a
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-12-21 12:25 Ingo Molnar
2018-12-21 19:30 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-12-21 12:25 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton, Masahiro Yamada
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 6ac389346e6964e1f6a1c675cebf8bd0912526a5 Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"
The biggest part is a series of reverts for the macro based GCC inlining
workarounds. It caused regressions in distro build and other kernel
tooling environments, and the GCC project was very receptive to fixing
the underlying inliner weaknesses - so as time ran out we decided to do a
reasonably straightforward revert of the patches. The plan is to rely on
the 'asm inline' GCC 9 feature, which might be backported to GCC 8 and
could thus become reasonably widely available on modern distros.
Other than those reverts, there's misc fixes from all around the place.
I wish our final x86 pull request for v4.20 was smaller...
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
Makefile # 6ac389346e69: Revert "kbuild/Makefile: Pre
include/asm-generic/bug.h # ffb61c6346d0: Revert "x86/bug: Macrofy the
include/linux/compiler.h # 96af6cd02a10: Revert "x86/objtool: Use asm
scripts/Kbuild.include # 6ac389346e69: Revert "kbuild/Makefile: Pre
scripts/mod/Makefile # 6ac389346e69: Revert "kbuild/Makefile: Pre
Thanks,
Ingo
------------------>
Alistair Strachan (1):
x86/vdso: Pass --eh-frame-hdr to the linker
Chang S. Bae (1):
x86/fsgsbase/64: Fix the base write helper functions
Colin Ian King (1):
x86/mtrr: Don't copy uninitialized gentry fields back to userspace
Dan Williams (1):
x86/mm: Fix decoy address handling vs 32-bit builds
Ingo Molnar (9):
Revert "x86/jump-labels: Macrofy inline assembly code to work around GCC inlining bugs"
Revert "x86/cpufeature: Macrofy inline assembly code to work around GCC inlining bugs"
Revert "x86/extable: Macrofy inline assembly code to work around GCC inlining bugs"
Revert "x86/paravirt: Work around GCC inlining bugs when compiling paravirt ops"
Revert "x86/bug: Macrofy the BUG table section handling, to work around GCC inlining bugs"
Revert "x86/alternatives: Macrofy lock prefixes to work around GCC inlining bugs"
Revert "x86/refcount: Work around GCC inlining bug"
Revert "x86/objtool: Use asm macros to work around GCC inlining bugs"
Revert "kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs"
Kirill A. Shutemov (2):
x86/mm: Fix guard hole handling
x86/dump_pagetables: Fix LDT remap address marker
Peter Zijlstra (1):
x86/mm/cpa: Fix cpa_flush_array() TLB invalidation
Reinette Chatre (1):
x86/intel_rdt: Ensure a CPU remains online for the region's pseudo-locking sequence
Makefile | 9 +--
arch/x86/Makefile | 7 --
arch/x86/entry/calling.h | 2 +-
arch/x86/entry/vdso/Makefile | 3 +-
arch/x86/include/asm/alternative-asm.h | 20 ++----
arch/x86/include/asm/alternative.h | 11 +++-
arch/x86/include/asm/asm.h | 53 +++++++++------
arch/x86/include/asm/bug.h | 98 ++++++++++++----------------
arch/x86/include/asm/cpufeature.h | 82 ++++++++++--------------
arch/x86/include/asm/fsgsbase.h | 15 +++--
arch/x86/include/asm/jump_label.h | 72 +++++++++++++++------
arch/x86/include/asm/paravirt_types.h | 56 ++++++++--------
arch/x86/include/asm/pgtable_64_types.h | 5 ++
arch/x86/include/asm/refcount.h | 81 ++++++++++-------------
arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 4 ++
arch/x86/kernel/cpu/mtrr/if.c | 2 +
arch/x86/kernel/macros.S | 16 -----
arch/x86/kernel/process_64.c | 99 ++++++++++++++++-------------
arch/x86/kernel/ptrace.c | 9 +--
arch/x86/mm/dump_pagetables.c | 15 ++---
arch/x86/mm/pageattr.c | 24 ++++---
arch/x86/mm/pat.c | 13 +++-
arch/x86/xen/mmu_pv.c | 11 ++--
include/asm-generic/bug.h | 8 +--
include/linux/compiler.h | 56 ++++------------
scripts/Kbuild.include | 4 +-
scripts/mod/Makefile | 2 -
27 files changed, 385 insertions(+), 392 deletions(-)
delete mode 100644 arch/x86/kernel/macros.S
diff --git a/Makefile b/Makefile
index e9fd22c8445e..da4bb1e10388 100644
--- a/Makefile
+++ b/Makefile
@@ -1081,7 +1081,7 @@ scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
# version.h and scripts_basic is processed / created.
# Listed in dependency order
-PHONY += prepare archprepare macroprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
# prepare3 is used to check if we are building in a separate output directory,
# and if so do:
@@ -1104,9 +1104,7 @@ prepare2: prepare3 outputmakefile asm-generic
prepare1: prepare2 $(version_h) $(autoksyms_h) include/generated/utsrelease.h
$(cmd_crmodverdir)
-macroprepare: prepare1 archmacros
-
-archprepare: archheaders archscripts macroprepare scripts_basic
+archprepare: archheaders archscripts prepare1 scripts_basic
prepare0: archprepare gcc-plugins
$(Q)$(MAKE) $(build)=.
@@ -1174,9 +1172,6 @@ archheaders:
PHONY += archscripts
archscripts:
-PHONY += archmacros
-archmacros:
-
PHONY += __headers
__headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
$(Q)$(MAKE) $(build)=scripts build_unifdef
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 75ef499a66e2..85a66c4a8b65 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,13 +232,6 @@ archscripts: scripts_basic
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
-archmacros:
- $(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
-
-ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
-export ASM_MACRO_FLAGS
-KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
-
###
# Kernel objects
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 25e5a6bda8c3..20d0885b00fb 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -352,7 +352,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef HAVE_JUMP_LABEL
- STATIC_BRANCH_JMP l_yes=.Lafter_call_\@, key=context_tracking_enabled, branch=1
+ STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0624bf2266fd..5bfe2243a08f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -171,7 +171,8 @@ quiet_cmd_vdso = VDSO $@
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) -Bsymbolic
+ $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
+ -Bsymbolic
GCOV_PROFILE := n
#
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 8e4ea39e55d0..31b627b43a8e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -7,24 +7,16 @@
#include <asm/asm.h>
#ifdef CONFIG_SMP
-.macro LOCK_PREFIX_HERE
+ .macro LOCK_PREFIX
+672: lock
.pushsection .smp_locks,"a"
.balign 4
- .long 671f - . # offset
+ .long 672b - .
.popsection
-671:
-.endm
-
-.macro LOCK_PREFIX insn:vararg
- LOCK_PREFIX_HERE
- lock \insn
-.endm
+ .endm
#else
-.macro LOCK_PREFIX_HERE
-.endm
-
-.macro LOCK_PREFIX insn:vararg
-.endm
+ .macro LOCK_PREFIX
+ .endm
#endif
/*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d7faa16622d8..4cd6a3b71824 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -31,8 +31,15 @@
*/
#ifdef CONFIG_SMP
-#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t"
-#define LOCK_PREFIX "LOCK_PREFIX "
+#define LOCK_PREFIX_HERE \
+ ".pushsection .smp_locks,\"a\"\n" \
+ ".balign 4\n" \
+ ".long 671f - .\n" /* offset */ \
+ ".popsection\n" \
+ "671:"
+
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+
#else /* ! CONFIG_SMP */
#define LOCK_PREFIX_HERE ""
#define LOCK_PREFIX ""
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 21b086786404..6467757bb39f 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -120,25 +120,12 @@
/* Exception table entry */
#ifdef __ASSEMBLY__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- ASM_EXTABLE_HANDLE from to handler
-
-.macro ASM_EXTABLE_HANDLE from:req to:req handler:req
- .pushsection "__ex_table","a"
- .balign 4
- .long (\from) - .
- .long (\to) - .
- .long (\handler) - .
+ .pushsection "__ex_table","a" ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .long (handler) - . ; \
.popsection
-.endm
-#else /* __ASSEMBLY__ */
-
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
- "ASM_EXTABLE_HANDLE from=" #from " to=" #to \
- " handler=\"" #handler "\"\n\t"
-
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-
-#endif /* __ASSEMBLY__ */
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
@@ -161,7 +148,6 @@
_ASM_PTR (entry); \
.popsection
-#ifdef __ASSEMBLY__
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
movl %edi,%ecx
@@ -185,7 +171,34 @@
_ASM_EXTABLE_UA(100b, 103b)
_ASM_EXTABLE_UA(101b, 103b)
.endm
-#endif /* __ASSEMBLY__ */
+
+#else
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .popsection\n"
+
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
+# define _ASM_EXTABLE_REFCOUNT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+#endif
#ifndef __ASSEMBLY__
/*
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 5090035e6d16..6804d6642767 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -4,8 +4,6 @@
#include <linux/stringify.h>
-#ifndef __ASSEMBLY__
-
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*
@@ -22,15 +20,53 @@
#define LEN_UD2 2
+#ifdef CONFIG_GENERIC_BUG
+
+#ifdef CONFIG_X86_32
+# define __BUG_REL(val) ".long " __stringify(val)
+#else
+# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define _BUG_FLAGS(ins, flags) \
+do { \
+ asm volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"aw\"\n" \
+ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
+ "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
+ "\t.word %c1" "\t# bug_entry::line\n" \
+ "\t.word %c2" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c3\n" \
+ ".popsection" \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
#define _BUG_FLAGS(ins, flags) \
do { \
- asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 " \
- "flags=%c2 size=%c3" \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
+ asm volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"aw\"\n" \
+ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
+ "\t.word %c0" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c1\n" \
+ ".popsection" \
+ : : "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#else
+
+#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+
+#endif /* CONFIG_GENERIC_BUG */
+
#define HAVE_ARCH_BUG
#define BUG() \
do { \
@@ -46,54 +82,4 @@ do { \
#include <asm-generic/bug.h>
-#else /* __ASSEMBLY__ */
-
-#ifdef CONFIG_GENERIC_BUG
-
-#ifdef CONFIG_X86_32
-.macro __BUG_REL val:req
- .long \val
-.endm
-#else
-.macro __BUG_REL val:req
- .long \val - 2b
-.endm
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1: \ins
- .pushsection __bug_table,"aw"
-2: __BUG_REL val=1b # bug_entry::bug_addr
- __BUG_REL val=\file # bug_entry::file
- .word \line # bug_entry::line
- .word \flags # bug_entry::flags
- .org 2b+\size
- .popsection
-.endm
-
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
-1: \ins
- .pushsection __bug_table,"aw"
-2: __BUG_REL val=1b # bug_entry::bug_addr
- .word \flags # bug_entry::flags
- .org 2b+\size
- .popsection
-.endm
-
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#else /* CONFIG_GENERIC_BUG */
-
-.macro ASM_BUG ins:req file:req line:req flags:req size:req
- \ins
-.endm
-
-#endif /* CONFIG_GENERIC_BUG */
-
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7d442722ef24..aced6c9290d6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -2,10 +2,10 @@
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
#include <asm/processor.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
#include <asm/asm.h>
#include <linux/bitops.h>
@@ -161,10 +161,37 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
- "cap_byte=\"%[cap_byte]\" "
- "feature=%P[feature] t_yes=%l[t_yes] "
- "t_no=%l[t_no] always=%P[always]"
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P[always]\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P[feature]\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
: : [feature] "i" (bit),
[always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
@@ -199,44 +226,5 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#else /* __ASSEMBLY__ */
-
-.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
-1:
- jmp 6f
-2:
- .skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
-3:
- .section .altinstructions,"a"
- .long 1b - . /* src offset */
- .long 4f - . /* repl offset */
- .word \always /* always replace */
- .byte 3b - 1b /* src len */
- .byte 5f - 4f /* repl len */
- .byte 3b - 2b /* pad len */
- .previous
- .section .altinstr_replacement,"ax"
-4:
- jmp \t_no
-5:
- .previous
- .section .altinstructions,"a"
- .long 1b - . /* src offset */
- .long 0 /* no replacement */
- .word \feature /* feature bit */
- .byte 3b - 1b /* src len */
- .byte 0 /* repl len */
- .byte 0 /* pad len */
- .previous
- .section .altinstr_aux,"ax"
-6:
- testb \bitnum,\cap_byte
- jnz \t_yes
- jmp \t_no
- .previous
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index eb377b6e9eed..bca4c743de77 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -16,8 +16,8 @@
*/
extern unsigned long x86_fsbase_read_task(struct task_struct *task);
extern unsigned long x86_gsbase_read_task(struct task_struct *task);
-extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
+extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
/* Helper functions for reading/writing FS/GS base */
@@ -39,8 +39,15 @@ static inline unsigned long x86_gsbase_read_cpu_inactive(void)
return gsbase;
}
-extern void x86_fsbase_write_cpu(unsigned long fsbase);
-extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+ wrmsrl(MSR_FS_BASE, fsbase);
+}
+
+static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+ wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+}
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a5fb34fe56a4..21efc9d07ed9 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,19 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
+#ifndef HAVE_JUMP_LABEL
+/*
+ * For better or for worse, if jump labels (the gcc extension) are missing,
+ * then the entire static branch patching infrastructure is compiled out.
+ * If that happens, the code in here will malfunction. Raise a compiler
+ * error instead.
+ *
+ * In theory, jump labels and the static branch patching infrastructure
+ * could be decoupled to fix this.
+ */
+#error asm/jump_label.h included on a non-jump-label kernel
+#endif
+
#define JUMP_LABEL_NOP_SIZE 5
#ifdef CONFIG_X86_64
@@ -20,9 +33,15 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" "
- "branch=\"%c1\""
- : : "i" (key), "i" (branch) : : l_yes);
+ asm_volatile_goto("1:"
+ ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+ ".pushsection __jump_table, \"aw\" \n\t"
+ _ASM_ALIGN "\n\t"
+ ".long 1b - ., %l[l_yes] - . \n\t"
+ _ASM_PTR "%c0 + %c1 - .\n\t"
+ ".popsection \n\t"
+ : : "i" (key), "i" (branch) : : l_yes);
+
return false;
l_yes:
return true;
@@ -30,8 +49,14 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" "
- "branch=\"%c1\""
+ asm_volatile_goto("1:"
+ ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+ "2:\n\t"
+ ".pushsection __jump_table, \"aw\" \n\t"
+ _ASM_ALIGN "\n\t"
+ ".long 1b - ., %l[l_yes] - . \n\t"
+ _ASM_PTR "%c0 + %c1 - .\n\t"
+ ".popsection \n\t"
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -41,26 +66,37 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
#else /* __ASSEMBLY__ */
-.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req
-.Lstatic_branch_nop_\@:
- .byte STATIC_KEY_INIT_NOP
-.Lstatic_branch_no_after_\@:
+.macro STATIC_JUMP_IF_TRUE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .else
+ .byte STATIC_KEY_INIT_NOP
+ .endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
- .long .Lstatic_branch_nop_\@ - ., \l_yes - .
- _ASM_PTR \key + \branch - .
+ .long .Lstatic_jump_\@ - ., \target - .
+ _ASM_PTR \key - .
.popsection
.endm
-.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req
-.Lstatic_branch_jmp_\@:
- .byte 0xe9
- .long \l_yes - .Lstatic_branch_jmp_after_\@
-.Lstatic_branch_jmp_after_\@:
+.macro STATIC_JUMP_IF_FALSE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ .byte STATIC_KEY_INIT_NOP
+ .else
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
- .long .Lstatic_branch_jmp_\@ - ., \l_yes - .
- _ASM_PTR \key + \branch - .
+ .long .Lstatic_jump_\@ - ., \target - .
+ _ASM_PTR \key + 1 - .
.popsection
.endm
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 26942ad63830..488c59686a73 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -348,11 +348,23 @@ extern struct paravirt_patch_template pv_ops;
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
+/*
+ * Generate some code, and mark it as patchable by the
+ * apply_paravirt() alternate instruction patcher.
+ */
+#define _paravirt_alt(insn_string, type, clobber) \
+ "771:\n\t" insn_string "\n" "772:\n" \
+ ".pushsection .parainstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 771b\n" \
+ " .byte " type "\n" \
+ " .byte 772b-771b\n" \
+ " .short " clobber "\n" \
+ ".popsection\n"
+
/* Generate patchable code, with the default asm parameters. */
-#define paravirt_call \
- "PARAVIRT_CALL type=\"%c[paravirt_typenum]\"" \
- " clobber=\"%c[paravirt_clobber]\"" \
- " pv_opptr=\"%c[paravirt_opptr]\";"
+#define paravirt_alt(insn_string) \
+ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -372,6 +384,16 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
int paravirt_disable_iospace(void);
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+#define PARAVIRT_CALL \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%c[paravirt_opptr];"
+
/*
* These macros are intended to wrap calls through one of the paravirt
* ops structs, so that they can be later identified and patched at
@@ -509,7 +531,7 @@ int paravirt_disable_iospace(void);
/* since this condition will never hold */ \
if (sizeof(rettype) > sizeof(unsigned long)) { \
asm volatile(pre \
- paravirt_call \
+ paravirt_alt(PARAVIRT_CALL) \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -519,7 +541,7 @@ int paravirt_disable_iospace(void);
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
asm volatile(pre \
- paravirt_call \
+ paravirt_alt(PARAVIRT_CALL) \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -546,7 +568,7 @@ int paravirt_disable_iospace(void);
PVOP_VCALL_ARGS; \
PVOP_TEST_NULL(op); \
asm volatile(pre \
- paravirt_call \
+ paravirt_alt(PARAVIRT_CALL) \
post \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
@@ -664,26 +686,6 @@ struct paravirt_patch_site {
extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
-#else /* __ASSEMBLY__ */
-
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req
-771: ANNOTATE_RETPOLINE_SAFE
- call *\pv_opptr
-772: .pushsection .parainstructions,"a"
- _ASM_ALIGN
- _ASM_PTR 771b
- .byte \type
- .byte 772b-771b
- .short \clobber
- .popsection
-.endm
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 84bd9bdc1987..88bca456da99 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -111,6 +111,11 @@ extern unsigned int ptrs_per_p4d;
*/
#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
+#define GUARD_HOLE_PGD_ENTRY -256UL
+#define GUARD_HOLE_SIZE (16UL << PGDIR_SHIFT)
+#define GUARD_HOLE_BASE_ADDR (GUARD_HOLE_PGD_ENTRY << PGDIR_SHIFT)
+#define GUARD_HOLE_END_ADDR (GUARD_HOLE_BASE_ADDR + GUARD_HOLE_SIZE)
+
#define LDT_PGD_ENTRY -240UL
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE)
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index a8b5e1e13319..dbaed55c1c24 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -4,41 +4,6 @@
* x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
* PaX/grsecurity.
*/
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-#include <asm/bug.h>
-
-.macro REFCOUNT_EXCEPTION counter:req
- .pushsection .text..refcount
-111: lea \counter, %_ASM_CX
-112: ud2
- ASM_UNREACHABLE
- .popsection
-113: _ASM_EXTABLE_REFCOUNT(112b, 113b)
-.endm
-
-/* Trigger refcount exception if refcount result is negative. */
-.macro REFCOUNT_CHECK_LT_ZERO counter:req
- js 111f
- REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-.macro REFCOUNT_CHECK_LE_ZERO counter:req
- jz 111f
- REFCOUNT_CHECK_LT_ZERO counter="\counter"
-.endm
-
-/* Trigger refcount exception unconditionally. */
-.macro REFCOUNT_ERROR counter:req
- jmp 111f
- REFCOUNT_EXCEPTION counter="\counter"
-.endm
-
-#else /* __ASSEMBLY__ */
-
#include <linux/refcount.h>
#include <asm/bug.h>
@@ -50,12 +15,35 @@
* central refcount exception. The fixup address for the exception points
* back to the regular execution flow in .text.
*/
+#define _REFCOUNT_EXCEPTION \
+ ".pushsection .text..refcount\n" \
+ "111:\tlea %[var], %%" _ASM_CX "\n" \
+ "112:\t" ASM_UD2 "\n" \
+ ASM_UNREACHABLE \
+ ".popsection\n" \
+ "113:\n" \
+ _ASM_EXTABLE_REFCOUNT(112b, 113b)
+
+/* Trigger refcount exception if refcount result is negative. */
+#define REFCOUNT_CHECK_LT_ZERO \
+ "js 111f\n\t" \
+ _REFCOUNT_EXCEPTION
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+#define REFCOUNT_CHECK_LE_ZERO \
+ "jz 111f\n\t" \
+ REFCOUNT_CHECK_LT_ZERO
+
+/* Trigger refcount exception unconditionally. */
+#define REFCOUNT_ERROR \
+ "jmp 111f\n\t" \
+ _REFCOUNT_EXCEPTION
static __always_inline void refcount_add(unsigned int i, refcount_t *r)
{
asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
- "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
- : [counter] "+m" (r->refs.counter)
+ REFCOUNT_CHECK_LT_ZERO
+ : [var] "+m" (r->refs.counter)
: "ir" (i)
: "cc", "cx");
}
@@ -63,32 +51,31 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
static __always_inline void refcount_inc(refcount_t *r)
{
asm volatile(LOCK_PREFIX "incl %0\n\t"
- "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
- : [counter] "+m" (r->refs.counter)
+ REFCOUNT_CHECK_LT_ZERO
+ : [var] "+m" (r->refs.counter)
: : "cc", "cx");
}
static __always_inline void refcount_dec(refcount_t *r)
{
asm volatile(LOCK_PREFIX "decl %0\n\t"
- "REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
- : [counter] "+m" (r->refs.counter)
+ REFCOUNT_CHECK_LE_ZERO
+ : [var] "+m" (r->refs.counter)
: : "cc", "cx");
}
static __always_inline __must_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
-
return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
- "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+ REFCOUNT_CHECK_LT_ZERO,
r->refs.counter, e, "er", i, "cx");
}
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
- "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+ REFCOUNT_CHECK_LT_ZERO,
r->refs.counter, e, "cx");
}
@@ -106,8 +93,8 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
/* Did we try to increment from/to an undesirable state? */
if (unlikely(c < 0 || c == INT_MAX || result < c)) {
- asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
- : : [counter] "m" (r->refs.counter)
+ asm volatile(REFCOUNT_ERROR
+ : : [var] "m" (r->refs.counter)
: "cc", "cx");
break;
}
@@ -122,6 +109,4 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
return refcount_add_not_zero(1, r);
}
-#endif /* __ASSEMBLY__ */
-
#endif
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 27937458c231..efa4a519f5e5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -23,6 +23,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/kernfs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -310,9 +311,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
return -EINVAL;
buf[nbytes - 1] = '\0';
+ cpus_read_lock();
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
+ cpus_read_unlock();
return -ENOENT;
}
rdt_last_cmd_clear();
@@ -367,6 +370,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
+ cpus_read_unlock();
return ret ?: nbytes;
}
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 2e173d47b450..4d36dcc1cf87 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -165,6 +165,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
struct mtrr_gentry gentry;
void __user *arg = (void __user *) __arg;
+ memset(&gentry, 0, sizeof(gentry));
+
switch (cmd) {
case MTRRIOC_ADD_ENTRY:
case MTRRIOC_SET_ENTRY:
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
deleted file mode 100644
index 161c95059044..000000000000
--- a/arch/x86/kernel/macros.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file includes headers whose assembly part includes macros which are
- * commonly used. The macros are precompiled into assmebly file which is later
- * assembled together with each compiled file.
- */
-
-#include <linux/compiler.h>
-#include <asm/refcount.h>
-#include <asm/alternative-asm.h>
-#include <asm/bug.h>
-#include <asm/paravirt.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/jump_label.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bbfbf017065c..ddd4fa718c43 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -339,24 +339,6 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
return base;
}
-void x86_fsbase_write_cpu(unsigned long fsbase)
-{
- /*
- * Set the selector to 0 as a notion, that the segment base is
- * overwritten, which will be checked for skipping the segment load
- * during context switch.
- */
- loadseg(FS, 0);
- wrmsrl(MSR_FS_BASE, fsbase);
-}
-
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
- /* Set the selector to 0 for the same reason as %fs above. */
- loadseg(GS, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
-
unsigned long x86_fsbase_read_task(struct task_struct *task)
{
unsigned long fsbase;
@@ -385,38 +367,18 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
return gsbase;
}
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
{
- /*
- * Not strictly needed for %fs, but do it for symmetry
- * with %gs
- */
- if (unlikely(fsbase >= TASK_SIZE_MAX))
- return -EPERM;
+ WARN_ON_ONCE(task == current);
- preempt_disable();
task->thread.fsbase = fsbase;
- if (task == current)
- x86_fsbase_write_cpu(fsbase);
- task->thread.fsindex = 0;
- preempt_enable();
-
- return 0;
}
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
{
- if (unlikely(gsbase >= TASK_SIZE_MAX))
- return -EPERM;
+ WARN_ON_ONCE(task == current);
- preempt_disable();
task->thread.gsbase = gsbase;
- if (task == current)
- x86_gsbase_write_cpu_inactive(gsbase);
- task->thread.gsindex = 0;
- preempt_enable();
-
- return 0;
}
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
@@ -754,11 +716,60 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
switch (option) {
case ARCH_SET_GS: {
- ret = x86_gsbase_write_task(task, arg2);
+ if (unlikely(arg2 >= TASK_SIZE_MAX))
+ return -EPERM;
+
+ preempt_disable();
+ /*
+ * ARCH_SET_GS has always overwritten the index
+ * and the base. Zero is the most sensible value
+ * to put in the index, and is the only value that
+ * makes any sense if FSGSBASE is unavailable.
+ */
+ if (task == current) {
+ loadseg(GS, 0);
+ x86_gsbase_write_cpu_inactive(arg2);
+
+ /*
+ * On non-FSGSBASE systems, save_base_legacy() expects
+ * that we also fill in thread.gsbase.
+ */
+ task->thread.gsbase = arg2;
+
+ } else {
+ task->thread.gsindex = 0;
+ x86_gsbase_write_task(task, arg2);
+ }
+ preempt_enable();
break;
}
case ARCH_SET_FS: {
- ret = x86_fsbase_write_task(task, arg2);
+ /*
+ * Not strictly needed for %fs, but do it for symmetry
+ * with %gs
+ */
+ if (unlikely(arg2 >= TASK_SIZE_MAX))
+ return -EPERM;
+
+ preempt_disable();
+ /*
+ * Set the selector to 0 for the same reason
+ * as %gs above.
+ */
+ if (task == current) {
+ loadseg(FS, 0);
+ x86_fsbase_write_cpu(arg2);
+
+ /*
+ * On non-FSGSBASE systems, save_base_legacy() expects
+ * that we also fill in thread.fsbase.
+ */
+ task->thread.fsbase = arg2;
+ } else {
+ task->thread.fsindex = 0;
+ x86_fsbase_write_task(task, arg2);
+ }
+ preempt_enable();
break;
}
case ARCH_GET_FS: {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ffae9b9740fd..4b8ee05dd6ad 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
- * When changing the FS base, use the same
- * mechanism as for do_arch_prctl_64().
+ * When changing the FS base, use do_arch_prctl_64()
+ * to set the index to zero and to set the base
+ * as requested.
*/
if (child->thread.fsbase != value)
- return x86_fsbase_write_task(child, value);
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
/*
@@ -410,7 +411,7 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
if (child->thread.gsbase != value)
- return x86_gsbase_write_task(child, value);
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index fc37bbd23eb8..abcb8d00b014 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -55,10 +55,10 @@ struct addr_marker {
enum address_markers_idx {
USER_SPACE_NR = 0,
KERNEL_SPACE_NR,
- LOW_KERNEL_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
LDT_NR,
#endif
+ LOW_KERNEL_NR,
VMALLOC_START_NR,
VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
@@ -66,9 +66,6 @@ enum address_markers_idx {
KASAN_SHADOW_END_NR,
#endif
CPU_ENTRY_AREA_NR,
-#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
- LDT_NR,
-#endif
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
@@ -512,11 +509,11 @@ static inline bool is_hypervisor_range(int idx)
{
#ifdef CONFIG_X86_64
/*
- * ffff800000000000 - ffff87ffffffffff is reserved for
- * the hypervisor.
+ * A hole in the beginning of kernel address space reserved
+ * for a hypervisor.
*/
- return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
- (idx < pgd_index(__PAGE_OFFSET));
+ return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
+ (idx < pgd_index(GUARD_HOLE_END_ADDR));
#else
return false;
#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index db7a10082238..a1bcde35db4c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -285,20 +285,16 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
-static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
+static bool __inv_flush_all(int cache)
{
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
- WARN_ON(PAGE_ALIGN(start) != start);
-
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
return true;
}
- flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
-
- return !cache;
+ return false;
}
static void cpa_flush_range(unsigned long start, int numpages, int cache)
@@ -306,7 +302,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
unsigned int i, level;
unsigned long addr;
- if (__cpa_flush_range(start, numpages, cache))
+ WARN_ON(PAGE_ALIGN(start) != start);
+
+ if (__inv_flush_all(cache))
+ return;
+
+ flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
+
+ if (!cache)
return;
/*
@@ -332,7 +335,12 @@ static void cpa_flush_array(unsigned long baddr, unsigned long *start,
{
unsigned int i, level;
- if (__cpa_flush_range(baddr, numpages, cache))
+ if (__inv_flush_all(cache))
+ return;
+
+ flush_tlb_all();
+
+ if (!cache)
return;
/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 08013524fba1..4fe956a63b25 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -519,8 +519,13 @@ static u64 sanitize_phys(u64 address)
* for a "decoy" virtual address (bit 63 clear) passed to
* set_memory_X(). __pa() on a "decoy" address results in a
* physical address with bit 63 set.
+ *
+ * Decoy addresses are not present for 32-bit builds, see
+ * set_mce_nospec().
*/
- return address & __PHYSICAL_MASK;
+ if (IS_ENABLED(CONFIG_X86_64))
+ return address & __PHYSICAL_MASK;
+ return address;
}
/*
@@ -546,7 +551,11 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
start = sanitize_phys(start);
end = sanitize_phys(end);
- BUG_ON(start >= end); /* end is exclusive */
+ if (start >= end) {
+ WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
+ start, end - 1, cattr_name(req_type));
+ return -EINVAL;
+ }
if (!pat_enabled()) {
/* This is identical to page table setting without PAT */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a5d7ed125337..0f4fe206dcc2 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -648,19 +648,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
unsigned long limit)
{
int i, nr, flush = 0;
- unsigned hole_low, hole_high;
+ unsigned hole_low = 0, hole_high = 0;
/* The limit is the last byte to be touched */
limit--;
BUG_ON(limit >= FIXADDR_TOP);
+#ifdef CONFIG_X86_64
/*
* 64-bit has a great big hole in the middle of the address
- * space, which contains the Xen mappings. On 32-bit these
- * will end up making a zero-sized hole and so is a no-op.
+ * space, which contains the Xen mappings.
*/
- hole_low = pgd_index(USER_LIMIT);
- hole_high = pgd_index(PAGE_OFFSET);
+ hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
+ hole_high = pgd_index(GUARD_HOLE_END_ADDR);
+#endif
nr = pgd_index(limit) + 1;
for (i = 0; i < nr; i++) {
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index cdafa5edea49..20561a60db9c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -17,8 +17,10 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
-struct bug_entry {
+#ifdef CONFIG_BUG
+
#ifdef CONFIG_GENERIC_BUG
+struct bug_entry {
#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
unsigned long bug_addr;
#else
@@ -33,10 +35,8 @@ struct bug_entry {
unsigned short line;
#endif
unsigned short flags;
-#endif /* CONFIG_GENERIC_BUG */
};
-
-#ifdef CONFIG_BUG
+#endif /* CONFIG_GENERIC_BUG */
/*
* Don't use BUG() or BUG_ON() unless there's really no way out; one
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 06396c1cf127..fc5004a4b07d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,13 +99,22 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
* unique, to convince GCC not to merge duplicate inline asm statements.
*/
#define annotate_reachable() ({ \
- asm volatile("ANNOTATE_REACHABLE counter=%c0" \
- : : "i" (__COUNTER__)); \
+ asm volatile("%c0:\n\t" \
+ ".pushsection .discard.reachable\n\t" \
+ ".long %c0b - .\n\t" \
+ ".popsection\n\t" : : "i" (__COUNTER__)); \
})
#define annotate_unreachable() ({ \
- asm volatile("ANNOTATE_UNREACHABLE counter=%c0" \
- : : "i" (__COUNTER__)); \
+ asm volatile("%c0:\n\t" \
+ ".pushsection .discard.unreachable\n\t" \
+ ".long %c0b - .\n\t" \
+ ".popsection\n\t" : : "i" (__COUNTER__)); \
})
+#define ASM_UNREACHABLE \
+ "999:\n\t" \
+ ".pushsection .discard.unreachable\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
#else
#define annotate_reachable()
#define annotate_unreachable()
@@ -293,45 +302,6 @@ static inline void *offset_to_ptr(const int *off)
return (void *)((unsigned long)off + *off);
}
-#else /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-#ifndef LINKER_SCRIPT
-
-#ifdef CONFIG_STACK_VALIDATION
-.macro ANNOTATE_UNREACHABLE counter:req
-\counter:
- .pushsection .discard.unreachable
- .long \counter\()b -.
- .popsection
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-\counter:
- .pushsection .discard.reachable
- .long \counter\()b -.
- .popsection
-.endm
-
-.macro ASM_UNREACHABLE
-999:
- .pushsection .discard.unreachable
- .long 999b - .
- .popsection
-.endm
-#else /* CONFIG_STACK_VALIDATION */
-.macro ANNOTATE_UNREACHABLE counter:req
-.endm
-
-.macro ANNOTATE_REACHABLE counter:req
-.endm
-
-.macro ASM_UNREACHABLE
-.endm
-#endif /* CONFIG_STACK_VALIDATION */
-
-#endif /* LINKER_SCRIPT */
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
/* Compile time object size, -1 for unknown */
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index bb015551c2d9..3d09844405c9 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -115,9 +115,7 @@ __cc-option = $(call try-run,\
# Do not attempt to build with gcc plugins during cc-option tests.
# (And this uses delayed resolution so the flags will be up to date.)
-# In addition, do not include the asm macros which are built later.
-CC_OPTION_FILTERED = $(GCC_PLUGINS_CFLAGS) $(ASM_MACRO_FLAGS)
-CC_OPTION_CFLAGS = $(filter-out $(CC_OPTION_FILTERED),$(KBUILD_CFLAGS))
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
# cc-option
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index a5b4af47987a..42c5d50f2bcc 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -4,8 +4,6 @@ OBJECT_FILES_NON_STANDARD := y
hostprogs-y := modpost mk_elfconfig
always := $(hostprogs-y) empty.o
-CFLAGS_REMOVE_empty.o := $(ASM_MACRO_FLAGS)
-
modpost-objs := modpost.o file2alias.o sumversion.o
devicetable-offsets-file := devicetable-offsets.h
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-12-09 22:06 Ingo Molnar
@ 2018-12-09 23:45 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2018-12-09 23:45 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Sun, 9 Dec 2018 23:06:55 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8586ca8a214471e4573d76356aabe890bfecdc8a
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-12-09 22:06 Ingo Molnar
2018-12-09 23:45 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-12-09 22:06 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: ac3e233d29f7f77f28243af0132057d378d3ea58 x86/vdso: Drop implicit common-page-size linker flag
Three fixes: a boot parameter re-(re-)fix, a retpoline build artifact fix
and an LLVM workaround.
Thanks,
Ingo
------------------>
Juergen Gross (1):
x86/boot: Clear RSDP address in boot_params for broken loaders
Masahiro Yamada (1):
x86/build: Fix compiler support check for CONFIG_RETPOLINE
Nick Desaulniers (1):
x86/vdso: Drop implicit common-page-size linker flag
arch/x86/Makefile | 10 +++++++---
arch/x86/entry/vdso/Makefile | 4 ++--
arch/x86/include/asm/bootparam_utils.h | 1 +
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f5d7f4134524..75ef499a66e2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,9 +220,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
-ifeq ($(RETPOLINE_CFLAGS),)
- $(error You are building kernel with non-retpoline compiler, please update your compiler.)
-endif
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
endif
@@ -307,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO
@echo Compiler lacks asm-goto support.
@exit 1
endif
+ifdef CONFIG_RETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+ @echo "You are building kernel with non-retpoline compiler." >&2
+ @echo "Please update your compiler." >&2
+ @false
+endif
+endif
archclean:
$(Q)rm -rf $(objtree)/arch/i386
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 141d415a8c80..0624bf2266fd 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
- -z max-page-size=4096 -z common-page-size=4096
+ -z max-page-size=4096
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
@@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg
CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
- -z max-page-size=4096 -z common-page-size=4096
+ -z max-page-size=4096
# x32-rebranded versions
vobjx32s-y := $(vobjs-y:.o=-x32.o)
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index a07ffd23e4dd..f6f6ef436599 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
*/
if (boot_params->sentinel) {
/* fields in boot_params are left uninitialized, clear them */
+ boot_params->acpi_rsdp_addr = 0;
memset(&boot_params->ext_ramdisk_image, 0,
(char *)&boot_params->efi_info -
(char *)&boot_params->ext_ramdisk_image);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-11-30 6:29 Ingo Molnar
@ 2018-11-30 21:00 ` pr-tracker-bot
0 siblings, 0 replies; 547+ messages in thread
From: pr-tracker-bot @ 2018-11-30 21:00 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
The pull request you sent on Fri, 30 Nov 2018 07:29:06 +0100:
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/1ec63573b2db363848abb313cc75eb29e9abc1b3
Thank you!
--
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-11-30 6:29 Ingo Molnar
2018-11-30 21:00 ` pr-tracker-bot
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-11-30 6:29 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 60c8144afc287ef09ce8c1230c6aa972659ba1bb x86/MCE/AMD: Fix the thresholding machinery initialization order
Misc fixes:
- an MCE related boot crash fix on certain AMD systems
- an FPU exception handling fix
- an FPU handling race fix
- a revert+rewrite of the RSDP boot protocol extension, use boot_params
instead
- a documentation fix
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/MCE/AMD: Fix the thresholding machinery initialization order
Elvira Khabirova (1):
x86/ptrace: Fix documentation for tracehook_report_syscall_entry()
Jann Horn (1):
x86/fpu: Use the correct exception table macro in the XSTATE_OP wrapper
Juergen Gross (2):
x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header")
x86/acpi, x86/boot: Take RSDP address from boot params if available
Sebastian Andrzej Siewior (1):
x86/fpu: Disable bottom halves while loading FPU registers
Documentation/x86/boot.txt | 32 +-------------------------------
| 6 +-----
arch/x86/include/asm/fpu/internal.h | 2 +-
arch/x86/include/asm/x86_init.h | 2 --
arch/x86/include/uapi/asm/bootparam.h | 7 ++-----
arch/x86/kernel/acpi/boot.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++-------------
arch/x86/kernel/fpu/signal.c | 4 ++--
arch/x86/kernel/head32.c | 1 -
arch/x86/kernel/head64.c | 2 --
arch/x86/kernel/setup.c | 17 -----------------
include/linux/tracehook.h | 4 ++--
12 files changed, 16 insertions(+), 82 deletions(-)
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7727db8f94bc..5e9b826b5f62 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -61,18 +61,6 @@ Protocol 2.12: (Kernel 3.8) Added the xloadflags field and extension fields
to struct boot_params for loading bzImage and ramdisk
above 4G in 64bit.
-Protocol 2.13: (Kernel 3.14) Support 32- and 64-bit flags being set in
- xloadflags to support booting a 64-bit kernel from 32-bit
- EFI
-
-Protocol 2.14: (Kernel 4.20) Added acpi_rsdp_addr holding the physical
- address of the ACPI RSDP table.
- The bootloader updates version with:
- 0x8000 | min(kernel-version, bootloader-version)
- kernel-version being the protocol version supported by
- the kernel and bootloader-version the protocol version
- supported by the bootloader.
-
**** MEMORY LAYOUT
The traditional memory map for the kernel loader, used for Image or
@@ -209,7 +197,6 @@ Offset Proto Name Meaning
0258/8 2.10+ pref_address Preferred loading address
0260/4 2.10+ init_size Linear memory required during initialization
0264/4 2.11+ handover_offset Offset of handover entry point
-0268/8 2.14+ acpi_rsdp_addr Physical address of RSDP table
(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
@@ -322,7 +309,7 @@ Protocol: 2.00+
Contains the magic number "HdrS" (0x53726448).
Field name: version
-Type: modify
+Type: read
Offset/size: 0x206/2
Protocol: 2.00+
@@ -330,12 +317,6 @@ Protocol: 2.00+
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
10.17.
- Up to protocol version 2.13 this information is only read by the
- bootloader. From protocol version 2.14 onwards the bootloader will
- write the used protocol version or-ed with 0x8000 to the field. The
- used protocol version will be the minimum of the supported protocol
- versions of the bootloader and the kernel.
-
Field name: realmode_swtch
Type: modify (optional)
Offset/size: 0x208/4
@@ -763,17 +744,6 @@ Offset/size: 0x264/4
See EFI HANDOVER PROTOCOL below for more details.
-Field name: acpi_rsdp_addr
-Type: write
-Offset/size: 0x268/8
-Protocol: 2.14+
-
- This field can be set by the boot loader to tell the kernel the
- physical address of the ACPI RSDP table.
-
- A value of 0 indicates the kernel should fall back to the standard
- methods to locate the RSDP.
-
**** THE IMAGE CHECKSUM
--git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4c881c850125..850b8762e889 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020e # header version number (>= 0x0105)
+ .word 0x020d # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -558,10 +558,6 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
-acpi_rsdp_addr: .quad 0 # 64-bit physical pointer to the
- # ACPI RSDP table, added with
- # version 2.14
-
# End of setup header #####################################################
.section ".entrytext", "ax"
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5f7290e6e954..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
"3: movl $-2,%[err]\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
- _ASM_EXTABLE_UA(1b, 3b) \
+ _ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f842104862c..b85a7c54c6a1 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -303,6 +303,4 @@ extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
extern bool x86_pnpbios_disabled(void);
-void x86_verify_bootdata_version(void);
-
#endif
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 22f89d040ddd..60733f137e9a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -16,9 +16,6 @@
#define RAMDISK_PROMPT_FLAG 0x8000
#define RAMDISK_LOAD_FLAG 0x4000
-/* version flags */
-#define VERSION_WRITTEN 0x8000
-
/* loadflags */
#define LOADED_HIGH (1<<0)
#define KASLR_FLAG (1<<1)
@@ -89,7 +86,6 @@ struct setup_header {
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
- __u64 acpi_rsdp_addr;
} __attribute__((packed));
struct sys_desc_table {
@@ -159,7 +155,8 @@ struct boot_params {
__u8 _pad2[4]; /* 0x054 */
__u64 tboot_addr; /* 0x058 */
struct ist_info ist_info; /* 0x060 */
- __u8 _pad3[16]; /* 0x070 */
+ __u64 acpi_rsdp_addr; /* 0x070 */
+ __u8 _pad3[8]; /* 0x078 */
__u8 hd0_info[16]; /* obsolete! */ /* 0x080 */
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 92c76bf97ad8..06635fbca81c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
u64 x86_default_get_root_pointer(void)
{
- return boot_params.hdr.acpi_rsdp_addr;
+ return boot_params.acpi_rsdp_addr;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f..e12454e21b8a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
-static bool thresholding_en;
+static bool thresholding_irq_en;
static const char * const th_names[] = {
"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
set_offset:
offset = setup_APIC_mce_threshold(offset, new);
-
- if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
- mce_threshold_vector = amd_threshold_interrupt;
+ if (offset == new)
+ thresholding_irq_en = true;
done:
mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
- if (!thresholding_en)
- return 0;
-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
struct threshold_bank **bp;
int err = 0;
- if (!thresholding_en)
- return 0;
-
bp = per_cpu(threshold_banks, cpu);
if (bp)
return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
- if (mce_threshold_vector == amd_threshold_interrupt)
- thresholding_en = true;
-
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
return err;
}
+ if (thresholding_irq_en)
+ mce_threshold_vector = amd_threshold_interrupt;
+
return 0;
}
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa..d99a8ee9e185 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
+ local_bh_disable();
fpu->initialized = 1;
- preempt_disable();
fpu__restore(fpu);
- preempt_enable();
+ local_bh_enable();
return err;
} else {
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 76fa3b836598..ec6fefbfd3c0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void)
cr4_init_shadow();
sanitize_boot_params(&boot_params);
- x86_verify_bootdata_version();
x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7663a8eb602b..16b1cbd3a61e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
if (!boot_params.hdr.version)
copy_bootdata(__va(real_mode_data));
- x86_verify_bootdata_version();
-
x86_early_init_platform_quirks();
switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab..d494b9bfe618 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p)
unwind_init();
}
-/*
- * From boot protocol 2.14 onwards we expect the bootloader to set the
- * version to "0x8000 | <used version>". In case we find a version >= 2.14
- * without the 0x8000 we assume the boot loader supports 2.13 only and
- * reset the version accordingly. The 0x8000 flag is removed in any case.
- */
-void __init x86_verify_bootdata_version(void)
-{
- if (boot_params.hdr.version & VERSION_WRITTEN)
- boot_params.hdr.version &= ~VERSION_WRITTEN;
- else if (boot_params.hdr.version >= 0x020e)
- boot_params.hdr.version = 0x020d;
-
- if (boot_params.hdr.version < 0x020e)
- boot_params.hdr.acpi_rsdp_addr = 0;
-}
-
#ifdef CONFIG_X86_32
static struct resource video_ram_resource = {
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b..df20f8bdbfa3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
* tracehook_report_syscall_entry - task is about to attempt a system call
* @regs: user register state of current task
*
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
* Full user register state is available here. Changing the values
* in @regs can affect the system call number and arguments to be tried.
* It is safe to block here, preventing the system call from beginning.
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-11-03 23:09 Ingo Molnar
@ 2018-11-04 1:27 ` Linus Torvalds
0 siblings, 0 replies; 547+ messages in thread
From: Linus Torvalds @ 2018-11-04 1:27 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linux Kernel Mailing List, tglx, bp, Peter Zijlstra, Andrew Morton
On Sat, Nov 3, 2018 at 4:09 PM Ingo Molnar <mingo@kernel.org> wrote:
>
> A number of fixes and some late updates:
Pulled,
Linus
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-11-03 23:09 Ingo Molnar
2018-11-04 1:27 ` Linus Torvalds
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-11-03 23:09 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 23a12ddee1ce28065b71f14ccc695b5a0c8a64ff Merge branch 'core/urgent' into x86/urgent, to pick up objtool fix
A number of fixes and some late updates:
- make in_compat_syscall() behavior on x86-32 similar to other
platforms, this touches a number of generic files but is not intended
to impact non-x86 platforms.
- objtool fixes
- PAT preemption fix
- paravirt fixes/cleanups
- cpufeatures updates for new instructions
- earlyprintk quirk
- make microcode version in sysfs world-readable (it is already world-readable in procfs)
- minor cleanups and fixes
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
drivers/firmware/efi/efivars.c # 98f76206b335: compat: Cleanup in_compat_sy
include/linux/compat.h # a846446b1914: x86/compat: Adjust in_compat
kernel/time/time.c # 98f76206b335: compat: Cleanup in_compat_sy
net/xfrm/xfrm_state.c # 98f76206b335: compat: Cleanup in_compat_sy
net/xfrm/xfrm_user.c # 98f76206b335: compat: Cleanup in_compat_sy
tools/objtool/check.c # 4a60aa05a063: objtool: Support per-functio
tools/objtool/check.h # 4a60aa05a063: objtool: Support per-functio
tools/objtool/elf.c # bcb6fb5da77c: objtool: Support GCC 9 cold
# 4a60aa05a063: objtool: Support per-functio
tools/objtool/elf.h # 4a60aa05a063: objtool: Support per-functio
Thanks,
Ingo
------------------>
Allan Xavier (1):
objtool: Support per-function rodata sections
Dave Jiang (1):
x86/numa_emulation: Fix uniform-split numa emulation
Dmitry Safonov (2):
x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT
compat: Cleanup in_compat_syscall() callers
Feng Tang (1):
x86/earlyprintk: Add a force option for pciserial device
Fenghua Yu (2):
x86/cpufeatures: Enumerate MOVDIRI instruction
x86/cpufeatures: Enumerate MOVDIR64B instruction
Jacek Tomaka (1):
x86/microcode: Make revision and processor flags world-readable
Jordan Borgner (1):
x86: Clean up 'sizeof x' => 'sizeof(x)'
Josh Poimboeuf (1):
objtool: Support GCC 9 cold subfunction naming scheme
Juergen Gross (2):
x86/paravirt: Remove GPL from pv_ops export
x86/paravirt: Remove unused _paravirt_ident_32
Rasmus Villemoes (1):
x86/traps: Use format string with panic() call
Sebastian Andrzej Siewior (1):
x86/mm/pat: Disable preemption around __flush_tlb_all()
Documentation/admin-guide/kernel-parameters.txt | 6 +++-
arch/x86/boot/cpucheck.c | 2 +-
arch/x86/boot/early_serial_console.c | 4 +--
arch/x86/boot/edd.c | 6 ++--
arch/x86/boot/main.c | 4 +--
arch/x86/boot/memory.c | 2 +-
arch/x86/boot/regs.c | 2 +-
arch/x86/boot/video-vesa.c | 6 ++--
arch/x86/boot/video.c | 2 +-
arch/x86/events/intel/core.c | 2 +-
arch/x86/include/asm/compat.h | 9 +++++-
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/include/asm/ftrace.h | 4 +--
arch/x86/include/asm/paravirt_types.h | 2 --
arch/x86/include/asm/tlbflush.h | 6 ++++
arch/x86/kernel/cpu/common.c | 4 +--
arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
arch/x86/kernel/cpu/microcode/core.c | 4 +--
arch/x86/kernel/cpu/mtrr/generic.c | 2 +-
arch/x86/kernel/cpu/mtrr/if.c | 6 ++--
arch/x86/kernel/early_printk.c | 29 +++++++++++------
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/msr.c | 8 ++---
arch/x86/kernel/paravirt.c | 28 +++++------------
arch/x86/kernel/paravirt_patch_32.c | 18 ++++-------
arch/x86/kernel/paravirt_patch_64.c | 20 ++++--------
arch/x86/kernel/process_64.c | 4 +--
arch/x86/kernel/sys_x86_64.c | 11 ++++---
arch/x86/kernel/traps.c | 2 +-
arch/x86/kvm/emulate.c | 22 ++++++-------
arch/x86/kvm/lapic.c | 2 +-
arch/x86/kvm/x86.c | 42 ++++++++++++-------------
arch/x86/mm/hugetlbpage.c | 4 +--
arch/x86/mm/mmap.c | 2 +-
arch/x86/mm/numa_emulation.c | 12 +++++--
arch/x86/mm/pageattr.c | 6 +++-
arch/x86/tools/relocs.c | 4 +--
arch/x86/um/asm/elf.h | 2 +-
drivers/firmware/efi/efivars.c | 16 +++-------
include/linux/compat.h | 4 +--
kernel/time/time.c | 2 +-
net/xfrm/xfrm_state.c | 2 --
net/xfrm/xfrm_user.c | 2 --
tools/objtool/check.c | 38 ++++++++++++++++++----
tools/objtool/check.h | 4 +--
tools/objtool/elf.c | 3 +-
tools/objtool/elf.h | 3 +-
47 files changed, 198 insertions(+), 171 deletions(-)
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-10-20 8:54 Ingo Molnar
@ 2018-10-20 13:28 ` Greg Kroah-Hartman
0 siblings, 0 replies; 547+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-20 13:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
On Sat, Oct 20, 2018 at 10:54:25AM +0200, Ingo Molnar wrote:
> Greg,
>
> Please pull the latest x86-urgent-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
Now merged, thanks.
greg k-h
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-20 8:54 Ingo Molnar
2018-10-20 13:28 ` Greg Kroah-Hartman
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-10-20 8:54 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
Greg,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 485734f3fc77c1eb77ffe138c027b9a4bf0178f3 x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels
It's 4 misc fixes, 3 build warning fixes and 3 comment fixes.
In hindsight I'd have left out the 3 comment fixes to make the pull
request look less scary at such a late point in the cycle. :-/
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
x86/entry/64: Further improve paranoid_entry comments
Christoph Hellwig (1):
x86/swiotlb: Enable swiotlb for > 4GiG RAM on 32-bit kernels
Dave Hansen (1):
x86/entry: Add some paranoid entry/exit CR3 handling comments
Jan Kiszka (1):
x86/entry/32: Clear the CS high bits
Nathan Chancellor (2):
x86/time: Correct the attribute on jiffies' definition
x86/boot: Add -Wno-pointer-sign to KBUILD_CFLAGS
Peter Zijlstra (2):
x86/tsc: Force inlining of cyc2ns bits
x86/percpu: Fix this_cpu_read()
Sebastian Andrzej Siewior (2):
x86/fpu: Remove second definition of fpu in __fpu__restore_sig()
x86/fpu: Fix i486 + no387 boot crash by only saving FPU registers on context switch if there is an FPU
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/entry/entry_32.S | 13 +++++++------
arch/x86/entry/entry_64.S | 13 +++++++++++++
arch/x86/include/asm/fpu/internal.h | 2 +-
arch/x86/include/asm/percpu.h | 8 ++++----
arch/x86/kernel/fpu/signal.c | 1 -
arch/x86/kernel/pci-swiotlb.c | 2 --
arch/x86/kernel/time.c | 2 +-
arch/x86/kernel/tsc.c | 6 +++---
9 files changed, 30 insertions(+), 18 deletions(-)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
* that register for the time this macro runs
*/
+ /*
+ * The high bits of the CS dword (__csh) are used for
+ * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+ * hardware didn't do this for us.
+ */
+ andl $(0x0000ffff), PT_CS(%esp)
+
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
/* Load top of task-stack into %edi */
movl TSS_entry2task_stack(%edi), %edi
- /*
- * Clear unused upper bits of the dword containing the word-sized CS
- * slot in pt_regs in case hardware didn't clear it for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Special case - entry from kernel mode via entry stack */
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
xorl %ebx, %ebx
1:
+ /*
+ * Always stash CR3 in %r14. This value will be restored,
+ * verbatim, at exit. Needed if paranoid_entry interrupted
+ * another entry that already switched to the user CR3 value
+ * but has not yet returned to userspace.
+ *
+ * This is also why CS (stashed in the "iret frame" by the
+ * hardware at entry) can not be used: this may be a return
+ * to kernel code, but with a user CR3 value.
+ */
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do { \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(1)",%0" \
+ asm volatile(op "b "__percpu_arg(1)",%0"\
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(1)",%0" \
+ asm volatile(op "w "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(1)",%0" \
+ asm volatile(op "l "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(1)",%0" \
+ asm volatile(op "q "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Validate and sanitize the copied state.
*/
- struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
-#endif
/*
* If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
#include <asm/time.h>
#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b52bd2b6cdb4..6d5dc5dabfd7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -58,7 +58,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -104,7 +104,7 @@ void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-10-11 9:14 Ingo Molnar
@ 2018-10-11 12:32 ` Greg Kroah-Hartman
0 siblings, 0 replies; 547+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-11 12:32 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
On Thu, Oct 11, 2018 at 11:14:16AM +0200, Ingo Molnar wrote:
> Greg,
>
> Please pull the latest x86-urgent-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
Now merged, thanks.
greg k-h
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-11 9:14 Ingo Molnar
2018-10-11 12:32 ` Greg Kroah-Hartman
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-10-11 9:14 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton
Greg,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 184d47f0fd365108bd06ab26cdb3450b716269fd x86/mm: Avoid VLA in pgd_alloc()
An intel_rdt memory access fix and a VLA fix in pgd_alloc().
Thanks,
Ingo
------------------>
Kees Cook (1):
x86/mm: Avoid VLA in pgd_alloc()
Reinette Chatre (1):
x86/intel_rdt: Fix out-of-bounds memory access in CBM tests
arch/x86/kernel/cpu/intel_rdt.h | 6 ++---
arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 20 ++++++++--------
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 36 ++++++++++++++++++-----------
arch/x86/mm/pgtable.c | 10 ++++++--
4 files changed, 45 insertions(+), 27 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 285eb3ec4200..3736f6dc9545 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive);
+ unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
- u32 cbm);
+ unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 40f3903ae5d9..f8c260d522ca 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
/**
* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
* @d: RDT domain
- * @_cbm: CBM to test
+ * @cbm: CBM to test
*
- * @d represents a cache instance and @_cbm a capacity bitmask that is
- * considered for it. Determine if @_cbm overlaps with any existing
+ * @d represents a cache instance and @cbm a capacity bitmask that is
+ * considered for it. Determine if @cbm overlaps with any existing
* pseudo-locked region on @d.
*
- * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
+ * Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *cbm_b;
unsigned int cbm_len;
+ unsigned long cbm_b;
if (d->plr) {
cbm_len = d->plr->r->cache.cbm_len;
- cbm_b = (unsigned long *)&d->plr->cbm;
- if (bitmap_intersects(cbm, cbm_b, cbm_len))
+ cbm_b = d->plr->cbm;
+ if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
}
return false;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 1b8e86a5d5e1..b140c68bc14b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
* is false then overlaps with any resource group or hardware entities
* will be considered.
*
+ * @cbm is unsigned long, even if only 32 bits are used, to make the
+ * bitmap functions work correctly.
+ *
* Return: false if CBM does not overlap, true if it does.
*/
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- u32 _cbm, int closid, bool exclusive)
+ unsigned long cbm, int closid, bool exclusive)
{
- unsigned long *cbm = (unsigned long *)&_cbm;
- unsigned long *ctrl_b;
enum rdtgrp_mode mode;
+ unsigned long ctrl_b;
u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
if (!exclusive) {
- if (bitmap_intersects(cbm,
- (unsigned long *)&r->cache.shareable_bits,
- r->cache.cbm_len))
+ ctrl_b = r->cache.shareable_bits;
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
return true;
}
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
for (i = 0; i < closids_supported(); i++, ctrl++) {
- ctrl_b = (unsigned long *)ctrl;
+ ctrl_b = *ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
- if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+ if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
if (exclusive) {
if (mode == RDT_MODE_EXCLUSIVE)
return true;
@@ -1138,15 +1139,18 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
* computed by first dividing the total cache size by the CBM length to
* determine how many bytes each bit in the bitmask represents. The result
* is multiplied with the number of bits set in the bitmask.
+ *
+ * @cbm is unsigned long, even if only 32 bits are used to make the
+ * bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, u32 cbm)
+ struct rdt_domain *d, unsigned long cbm)
{
struct cpu_cacheinfo *ci;
unsigned int size = 0;
int num_b, i;
- num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+ num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == r->cache_level) {
@@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;
struct rdt_resource *r;
+ unsigned long tmp_cbm;
enum rdtgrp_mode mode;
struct rdt_domain *d;
int i, ret;
@@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
* modify the CBM based on system availability.
*/
cbm_ensure_valid(&d->new_ctrl, r);
- if (bitmap_weight((unsigned long *) &d->new_ctrl,
- r->cache.cbm_len) <
- r->cache.min_cbm_bits) {
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure
+ * that bitmap_weight() does not access out-of-bound
+ * memory.
+ */
+ tmp_cbm = d->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+ r->cache.min_cbm_bits) {
rdt_last_cmd_printf("no space on %s:%d\n",
r->name, d->id);
return -ENOSPC;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 089e78c4effd..59274e2c1ac4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd)
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+#define MAX_UNSHARED_PTRS_PER_PGD \
+ max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
@@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd)
* and initialize the kernel pmds here.
*/
#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
+#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
/*
* We allocate separate PMDs for the kernel part of the user page-table
@@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd)
*/
#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
+#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
@@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
+#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
+#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
@@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
- pmd_t *u_pmds[PREALLOCATED_USER_PMDS];
- pmd_t *pmds[PREALLOCATED_PMDS];
+ pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
+ pmd_t *pmds[MAX_PREALLOCATED_PMDS];
pgd = _pgd_alloc();
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-10-05 9:53 Ingo Molnar
@ 2018-10-05 23:06 ` Greg Kroah-Hartman
0 siblings, 0 replies; 547+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-05 23:06 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton, Andy Lutomirski
On Fri, Oct 05, 2018 at 11:53:54AM +0200, Ingo Molnar wrote:
> Greg,
>
> Please pull the latest x86-urgent-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
Now merged, thanks.
greg k-h
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-10-05 9:53 Ingo Molnar
2018-10-05 23:06 ` Greg Kroah-Hartman
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-10-05 9:53 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: linux-kernel, Linus Torvalds, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Andrew Morton, Andy Lutomirski
Greg,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 02e425668f5c9deb42787d10001a3b605993ad15 x86/vdso: Fix vDSO syscall fallback asm constraint regression
Misc fixes:
- fix various vDSO bugs: asm constraints and retpolines
- add vDSO test units to make sure they never re-appear
- fix UV platform TSC initialization bug
- fix build warning on Clang
Thanks,
Ingo
------------------>
Andy Lutomirski (4):
x86/vdso: Fix asm constraints on vDSO syscall fallbacks
selftests/x86: Add clock_gettime() tests to test_vdso
x86/vdso: Only enable vDSO retpolines when enabled and supported
x86/vdso: Fix vDSO syscall fallback asm constraint regression
Mike Travis (2):
x86/platform/uv: Provide is_early_uv_system()
x86/tsc: Fix UV TSC initialization
Nathan Chancellor (1):
x86/cpu/amd: Remove unnecessary parentheses
arch/x86/entry/vdso/Makefile | 16 ++-
arch/x86/entry/vdso/vclock_gettime.c | 26 ++---
arch/x86/include/asm/uv/uv.h | 6 ++
arch/x86/kernel/cpu/amd.c | 2 +-
arch/x86/kernel/tsc.c | 4 +
tools/testing/selftests/x86/test_vdso.c | 172 ++++++++++++++++++++++++++++++++
6 files changed, 211 insertions(+), 15 deletions(-)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-fno-omit-frame-pointer -foptimize-sibling-calls \
- -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*ts) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+ "memory", "rcx", "r11");
return ret;
}
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[clock], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+ : "=a" (ret), "=m" (*ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
: "memory", "edx");
return ret;
}
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[tv], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+ : "=a" (ret), "=m" (*tv), "=m" (*tz)
+ : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
: "memory", "edx");
return ret;
}
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
struct mm_struct;
#ifdef CONFIG_X86_UV
+#include <linux/efi.h>
extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+ return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
extern int is_uv_system(void);
extern int is_uv_hubless(void);
extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
#else /* X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
static inline int is_uv_hubless(void) { return 0; }
static inline void uv_cpu_init(void) { }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
- if ((c->x86 == 6)) {
+ if (c->x86 == 6) {
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..b52bd2b6cdb4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
#include <asm/apic.h>
#include <asm/intel-family.h>
#include <asm/i8259.h>
+#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
+ /* Don't change UV TSC multi-chassis synchronization */
+ if (is_early_uv_system())
+ return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
+#include <limits.h>
#ifndef SYS_getcpu
# ifdef __x86_64__
@@ -31,6 +32,14 @@
int nerrs = 0;
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
printf("Warning: failed to find getcpu in vDSO\n");
vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ printf("Warning: failed to find clock_gettime in vDSO\n");
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gettimeofday)
+ printf("Warning: failed to find gettimeofday in vDSO\n");
+
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
return syscall(__NR_getcpu, cpu, node, cache);
}
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(__NR_gettimeofday, tv, tz);
+}
+
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
}
}
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+ [0] = "CLOCK_REALTIME",
+ [1] = "CLOCK_MONOTONIC",
+ [2] = "CLOCK_PROCESS_CPUTIME_ID",
+ [3] = "CLOCK_THREAD_CPUTIME_ID",
+ [4] = "CLOCK_MONOTONIC_RAW",
+ [5] = "CLOCK_REALTIME_COARSE",
+ [6] = "CLOCK_MONOTONIC_COARSE",
+ [7] = "CLOCK_BOOTTIME",
+ [8] = "CLOCK_REALTIME_ALARM",
+ [9] = "CLOCK_BOOTTIME_ALARM",
+ [10] = "CLOCK_SGI_CYCLE",
+ [11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+ struct timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ end_ret = sys_clock_gettime(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+}
+
+static void test_clock_gettime(void)
+{
+ for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+ clock++) {
+ test_one_clock_gettime(clock, clocknames[clock]);
+ }
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime(-1, "invalid");
+ test_one_clock_gettime(INT_MIN, "invalid");
+ test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+ struct timeval start, vdso, end;
+ struct timezone sys_tz, vdso_tz;
+ int vdso_ret, end_ret;
+
+ if (!vdso_gettimeofday)
+ return;
+
+ printf("[RUN]\tTesting gettimeofday...\n");
+
+ if (sys_gettimeofday(&start, &sys_tz) < 0) {
+ printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+ nerrs++;
+ return;
+ }
+
+ vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ end_ret = sys_gettimeofday(&end, NULL);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+ (unsigned long long)start.tv_sec, start.tv_usec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+ (unsigned long long)end.tv_sec, end.tv_usec);
+
+ if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+
+ if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+ sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+ printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+ sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+ } else {
+ printf("[FAIL]\ttimezones do not match\n");
+ nerrs++;
+ }
+
+ /* And make sure that passing NULL for tz doesn't crash. */
+ vdso_gettimeofday(&vdso, NULL);
+}
+
int main(int argc, char **argv)
{
fill_function_pointers();
+ test_clock_gettime();
+ test_gettimeofday();
+
+ /*
+ * Test getcpu() last so that, if something goes wrong setting affinity,
+ * we still run the other tests.
+ */
test_getcpu();
return nerrs ? 1 : 0;
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-09-15 13:24 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-09-15 13:24 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 002b87d2aace62b4f3841c3aa43309d2380092be x86/APM: Fix build warning when PROC_FS is not enabled
Misc fixes:
- EFI crash fix
- Xen PV fixes
- do not allow PTI on 2-level 32-bit kernels for now
- documentation fix
Thanks,
Ingo
------------------>
Boris Ostrovsky (1):
x86/EISA: Don't probe EISA bus for Xen PV guests
Guenter Roeck (1):
x86/efi: Load fixmap GDT in efi_call_phys_epilog() before setting %cr3
Joerg Roedel (1):
Revert "x86/mm/legacy: Populate the user page-table with user pgd's"
Juergen Gross (1):
x86/xen: Disable CPU0 hotplug for Xen PV
Randy Dunlap (2):
x86/doc: Fix Documentation/x86/earlyprintk.txt
x86/APM: Fix build warning when PROC_FS is not enabled
Documentation/x86/earlyprintk.txt | 25 +++++++++++++++----------
arch/x86/include/asm/pgtable-2level.h | 9 ---------
arch/x86/kernel/apm_32.c | 2 ++
arch/x86/kernel/eisa.c | 10 ++++++++--
arch/x86/kernel/topology.c | 4 +++-
arch/x86/platform/efi/efi_32.c | 3 +--
security/Kconfig | 2 +-
7 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
index 688e3eeed21d..46933e06c972 100644
--- a/Documentation/x86/earlyprintk.txt
+++ b/Documentation/x86/earlyprintk.txt
@@ -35,25 +35,25 @@ and two USB cables, connected like this:
( If your system does not list a debug port capability then you probably
won't be able to use the USB debug key. )
- b.) You also need a Netchip USB debug cable/key:
+ b.) You also need a NetChip USB debug cable/key:
http://www.plxtech.com/products/NET2000/NET20DC/default.asp
- This is a small blue plastic connector with two USB connections,
+ This is a small blue plastic connector with two USB connections;
it draws power from its USB connections.
c.) You need a second client/console system with a high speed USB 2.0
port.
- d.) The Netchip device must be plugged directly into the physical
+ d.) The NetChip device must be plugged directly into the physical
debug port on the "host/target" system. You cannot use a USB hub in
between the physical debug port and the "host/target" system.
The EHCI debug controller is bound to a specific physical USB
- port and the Netchip device will only work as an early printk
+ port and the NetChip device will only work as an early printk
device in this port. The EHCI host controllers are electrically
wired such that the EHCI debug controller is hooked up to the
- first physical and there is no way to change this via software.
+ first physical port and there is no way to change this via software.
You can find the physical port through experimentation by trying
each physical port on the system and rebooting. Or you can try
and use lsusb or look at the kernel info messages emitted by the
@@ -65,9 +65,9 @@ and two USB cables, connected like this:
to the hardware vendor, because there is no reason not to wire
this port into one of the physically accessible ports.
- e.) It is also important to note, that many versions of the Netchip
+ e.) It is also important to note, that many versions of the NetChip
device require the "client/console" system to be plugged into the
- right and side of the device (with the product logo facing up and
+ right hand side of the device (with the product logo facing up and
readable left to right). The reason being is that the 5 volt
power supply is taken from only one side of the device and it
must be the side that does not get rebooted.
@@ -81,13 +81,18 @@ and two USB cables, connected like this:
CONFIG_EARLY_PRINTK_DBGP=y
And you need to add the boot command line: "earlyprintk=dbgp".
+
(If you are using Grub, append it to the 'kernel' line in
- /etc/grub.conf)
+ /etc/grub.conf. If you are using Grub2 on a BIOS firmware system,
+ append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
+ using Grub2 on an EFI firmware system, append it to the 'linux'
+ or 'linuxefi' line in /boot/grub2/grub.cfg or
+ /boot/efi/EFI/<distro>/grub.cfg.)
On systems with more than one EHCI debug controller you must
specify the correct EHCI debug controller number. The ordering
comes from the PCI bus enumeration of the EHCI controllers. The
- default with no number argument is "0" the first EHCI debug
+ default with no number argument is "0" or the first EHCI debug
controller. To use the second EHCI debug controller, you would
use the command line: "earlyprintk=dbgp1"
@@ -111,7 +116,7 @@ and two USB cables, connected like this:
see the raw output.
c.) On Nvidia Southbridge based systems: the kernel will try to probe
- and find out which port has debug device connected.
+ and find out which port has a debug device connected.
3. Testing that it works fine:
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 24c6cf5f16b7..60d0f9015317 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -19,9 +19,6 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
-#endif
*pmdp = pmd;
}
@@ -61,9 +58,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
#ifdef CONFIG_SMP
static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
-#endif
return __pmd(xchg((pmdval_t *)xp, 0));
}
#else
@@ -73,9 +67,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#ifdef CONFIG_SMP
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
-#endif
return __pud(xchg((pudval_t *)xp, 0));
}
#else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ec00d1ff5098..f7151cd03cb0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp)
return 0;
}
+#ifdef CONFIG_PROC_FS
static int proc_apm_show(struct seq_file *m, void *v)
{
unsigned short bx;
@@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v)
units);
return 0;
}
+#endif
static int apm(void *unused)
{
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index f260e452e4f8..e8c8c5d78dbd 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -7,11 +7,17 @@
#include <linux/eisa.h>
#include <linux/io.h>
+#include <xen/xen.h>
+
static __init int eisa_bus_probe(void)
{
- void __iomem *p = ioremap(0x0FFFD9, 4);
+ void __iomem *p;
+
+ if (xen_pv_domain() && !xen_initial_domain())
+ return 0;
- if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+ p = ioremap(0x0FFFD9, 4);
+ if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
EISA_bus = 1;
iounmap(p);
return 0;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 12cbe2b88c0f..738bf42b0218 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -111,8 +111,10 @@ int arch_register_cpu(int num)
/*
* Currently CPU0 is only hotpluggable on Intel platforms. Other
* vendors can add hotplug support later.
+ * Xen PV guests don't support CPU0 hotplug at all.
*/
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ if (c->x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_has(X86_FEATURE_XENPV))
cpu0_hotpluggable = 0;
/*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 05ca14222463..9959657127f4 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,10 +85,9 @@ pgd_t * __init efi_call_phys_prolog(void)
void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
+ load_fixmap_gdt(0);
load_cr3(save_pgd);
__flush_tlb_all();
-
- load_fixmap_gdt(0);
}
void __init efi_runtime_update_mappings(void)
diff --git a/security/Kconfig b/security/Kconfig
index 27d8b2688f75..d9aa521b5206 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -57,7 +57,7 @@ config SECURITY_NETWORK
config PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
default y
- depends on X86 && !UML
+ depends on (X86_64 || X86_PAE) && !UML
help
This feature reduces the number of hardware side channels by
ensuring that the majority of kernel addresses are not mapped
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-07-30 17:59 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-07-30 17:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a x86/boot: Fix if_changed build flip/flop bug
Misc fixes:
- a build race fix
- a Xen entry fix
- a TSC_DEADLINE quirk future-proofing fix
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
x86/entry/64: Remove %ebx handling from error_entry/exit
Kees Cook (1):
x86/boot: Fix if_changed build flip/flop bug
Len Brown (1):
x86/apic: Future-proof the TSC_DEADLINE quirk for SKX
arch/x86/boot/compressed/Makefile | 8 ++++++--
arch/x86/entry/entry_64.S | 18 ++++--------------
arch/x86/kernel/apic/apic.c | 3 +++
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..169c2feda14a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,9 +106,13 @@ define cmd_check_data_rel
done
endef
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD $@
+ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
- $(call if_changed,check_data_rel)
- $(call if_changed,ld)
+ $(call if_changed,check-and-link-vmlinux)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73a522d53b53..8ae7ffda8f98 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -981,7 +981,7 @@ ENTRY(\sym)
call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
+ jmp error_exit
.endif
END(\sym)
.endm
@@ -1222,7 +1222,6 @@ END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
- * Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
UNWIND_HINT_FUNC
@@ -1269,7 +1268,6 @@ ENTRY(error_entry)
* for these here too.
*/
.Lerror_kernelspace:
- incl %ebx
leaq native_irq_return_iret(%rip), %rcx
cmpq %rcx, RIP+8(%rsp)
je .Lerror_bad_iret
@@ -1303,28 +1301,20 @@ ENTRY(error_entry)
/*
* Pretend that the exception came from user mode: set up pt_regs
- * as if we faulted immediately after IRET and clear EBX so that
- * error_exit knows that we will be returning to user mode.
+ * as if we faulted immediately after IRET.
*/
mov %rsp, %rdi
call fixup_bad_iret
mov %rax, %rsp
- decl %ebx
jmp .Lerror_entry_from_usermode_after_swapgs
END(error_entry)
-
-/*
- * On entry, EBX is a "return to kernel mode" flag:
- * 1: already in kernel mode, don't need SWAPGS
- * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
- */
ENTRY(error_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- testl %ebx, %ebx
- jnz retint_kernel
+ testb $3, CS(%rsp)
+ jz retint_kernel
jmp retint_user
END(error_exit)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..adbda5847b14 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void)
case 0x04: return 0x02000014;
}
+ if (boot_cpu_data.x86_stepping > 4)
+ return 0;
+
return ~0U;
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-07-02 18:53 ` Linus Torvalds
@ 2018-07-03 7:56 ` Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-07-03 7:56 UTC (permalink / raw)
To: Linus Torvalds
Cc: Andy Lutomirski, Linux Kernel Mailing List, Thomas Gleixner,
Peter Zijlstra, Andrew Morton
* Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Mon, Jul 2, 2018 at 11:48 AM Andy Lutomirski <luto@amacapital.net> wrote:
> >
> > BTR is way more leet than AND!
>
> I stand corrected.
Ok, on that basis I won't try to convert it to AND ;-)
Seriously though, there's two other 32-bit prefix cleanup/micro-speedup changes
I'll queue up later today:
[PATCH v2] x86-64: use 32-bit XOR to zero registers
[PATCH] x86/entry/64: add two more instruction suffixes
I'll Cc: you guys on the commits and maybe you can find something weird (or leet)
in them as well.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-07-02 18:47 ` Andy Lutomirski
@ 2018-07-02 18:53 ` Linus Torvalds
2018-07-03 7:56 ` Ingo Molnar
0 siblings, 1 reply; 547+ messages in thread
From: Linus Torvalds @ 2018-07-02 18:53 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
Peter Zijlstra, Andrew Morton
On Mon, Jul 2, 2018 at 11:48 AM Andy Lutomirski <luto@amacapital.net> wrote:
>
> BTR is way more leet than AND!
I stand corrected.
Linus
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-06-30 19:01 ` Linus Torvalds
@ 2018-07-02 18:47 ` Andy Lutomirski
2018-07-02 18:53 ` Linus Torvalds
0 siblings, 1 reply; 547+ messages in thread
From: Andy Lutomirski @ 2018-07-02 18:47 UTC (permalink / raw)
To: Linus Torvalds
Cc: Ingo Molnar, Linux Kernel Mailing List, Thomas Gleixner,
Peter Zijlstra, Andrew Morton
On Sat, Jun 30, 2018 at 12:01 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Sat, Jun 30, 2018 at 1:49 AM Ingo Molnar <mingo@kernel.org> wrote:
>>
>> --- a/arch/x86/entry/entry_32.S
>> +++ b/arch/x86/entry/entry_32.S
>> @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
>> * whereas POPF does not.)
>> */
>> addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
>> - btr $X86_EFLAGS_IF_BIT, (%esp)
>> + btrl $X86_EFLAGS_IF_BIT, (%esp)
>> popfl
>
> Ho humm. Just looking at this patch, my reaction was "why isn't this
> an 'andl $~X86_EFLAGS_IF' instead"?
>
> Yeah, I guess the 'andl' is two bytes longer (due to the 32-bit
> constant - because IF is bit 9, you can't use a byte constant, and you
> don't want to get a partial word write just before the popfl).
>
> But btr is really pretty heavy operation for older CPU's (it's gotten
> better, but 32-bit code presumably cares more about the older CPUs).
>
> It really doesn't matter, I guess. The btr goes back to commit
> c2c9b52fab0d ("x86/entry/32: Restore FLAGS on SYSEXIT").
>
> Andy?
>
BTR is way more leet than AND!
Seriously, though, I've never really tried to shave cycles off the
32-bit code, and BTR is shorter, and I didn't spend more than about
one brain cycle thinking about it. I guess that BTR has a more
complicated flags pipeline (the output flags depend on the input, not
just the output) and probably uses some more complicated ALU circuit
as compared to ANDL.
--Andy
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-06-30 8:49 Ingo Molnar
@ 2018-06-30 19:01 ` Linus Torvalds
2018-07-02 18:47 ` Andy Lutomirski
0 siblings, 1 reply; 547+ messages in thread
From: Linus Torvalds @ 2018-06-30 19:01 UTC (permalink / raw)
To: Ingo Molnar, Andy Lutomirski
Cc: Linux Kernel Mailing List, Thomas Gleixner, Peter Zijlstra,
Andrew Morton
On Sat, Jun 30, 2018 at 1:49 AM Ingo Molnar <mingo@kernel.org> wrote:
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
> * whereas POPF does not.)
> */
> addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
> - btr $X86_EFLAGS_IF_BIT, (%esp)
> + btrl $X86_EFLAGS_IF_BIT, (%esp)
> popfl
Ho humm. Just looking at this patch, my reaction was "why isn't this
an 'andl $~X86_EFLAGS_IF' instead"?
Yeah, I guess the 'andl' is two bytes longer (due to the 32-bit
constant - because IF is bit 9, you can't use a byte constant, and you
don't want to get a partial word write just before the popfl).
But btr is really pretty heavy operation for older CPU's (it's gotten
better, but 32-bit code presumably cares more about the older CPUs).
It really doesn't matter, I guess. The btr goes back to commit
c2c9b52fab0d ("x86/entry/32: Restore FLAGS on SYSEXIT").
Andy?
Linus
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-06-30 8:49 Ingo Molnar
2018-06-30 19:01 ` Linus Torvalds
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-06-30 8:49 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: d79d0d8ad0cb3d782b41631dfeac8eb05e414bcd x86/mm: Clean up the printk()s in show_fault_oops()
The biggest diffstat comes from self-test updates, plus there's entry code fixes,
5-level paging related fixes, console debug output fixes, plus misc fixes.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
tools/testing/selftests/x86/sigreturn.c# e8a445dea219: selftests/x86/sigreturn: Do
# ec3480205660: selftests/x86/sigreturn/64:
Thanks,
Ingo
------------------>
Andrey Ryabinin (1):
x86/mm: Don't free P4D table when it is folded at runtime
Andy Lutomirski (3):
x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80"
selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs
selftests/x86/sigreturn: Do minor cleanups
Dmitry Vyukov (2):
x86/mm: Get rid of KERN_CONT in show_fault_oops()
x86/mm: Clean up the printk()s in show_fault_oops()
Jan Beulich (1):
x86/entry/32: Add explicit 'l' instruction suffix
Kirill A. Shutemov (2):
x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y
x86/mm: Drop unneeded __always_inline for p4d page table helpers
arch/x86/entry/entry_32.S | 2 +-
arch/x86/entry/entry_64_compat.S | 16 ++++-----
arch/x86/include/asm/pgalloc.h | 3 ++
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/include/asm/pgtable_64.h | 4 +--
arch/x86/mm/fault.c | 21 ++++--------
arch/x86/platform/efi/efi_64.c | 4 +--
tools/testing/selftests/x86/sigreturn.c | 59 ++++++++++++++++++++-------------
8 files changed, 60 insertions(+), 51 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2582881d19ce..c371bfee137a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32)
* whereas POPF does not.)
*/
addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
- btr $X86_EFLAGS_IF_BIT, (%esp)
+ btrl $X86_EFLAGS_IF_BIT, (%esp)
popfl
/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9de7f1e1dede..7d0df78db727 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
+ pushq $0 /* pt_regs->r8 = 0 */
xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
+ pushq $0 /* pt_regs->r9 = 0 */
xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10 */
+ pushq $0 /* pt_regs->r10 = 0 */
xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
+ pushq $0 /* pt_regs->r11 = 0 */
xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
@@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat)
pushq %rcx /* pt_regs->cx */
xorl %ecx, %ecx /* nospec cx */
pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
+ pushq %r8 /* pt_regs->r8 */
xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
+ pushq %r9 /* pt_regs->r9 */
xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
+ pushq %r10 /* pt_regs->r10*/
xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
+ pushq %r11 /* pt_regs->r11 */
xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index ada6410fd2ec..fbd578daa66e 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
+ if (!pgtable_l5_enabled())
+ return;
+
BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
free_page((unsigned long)p4d);
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 99ecde23c3ec..5715647fc4fe 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -898,7 +898,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */
-static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 0fdcd21dadbd..3c5385f9a88f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
}
#endif
-static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
@@ -230,7 +230,7 @@ static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
*p4dp = native_make_p4d(native_pgd_val(pgd));
}
-static __always_inline void native_p4d_clear(p4d_t *p4d)
+static inline void native_p4d_clear(p4d_t *p4d)
{
native_set_p4d(p4d, native_make_p4d(0));
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9a84a0d08727..2aafa6ab6103 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -641,11 +641,6 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
return 0;
}
-static const char nx_warning[] = KERN_CRIT
-"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
-static const char smep_warning[] = KERN_CRIT
-"unable to execute userspace code (SMEP?) (uid: %d)\n";
-
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
@@ -664,20 +659,18 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
pte = lookup_address_in_pgd(pgd, address, &level);
if (pte && pte_present(*pte) && !pte_exec(*pte))
- printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+ pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
+ from_kuid(&init_user_ns, current_uid()));
if (pte && pte_present(*pte) && pte_exec(*pte) &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(__read_cr4() & X86_CR4_SMEP))
- printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
+ pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
+ from_kuid(&init_user_ns, current_uid()));
}
- printk(KERN_ALERT "BUG: unable to handle kernel ");
- if (address < PAGE_SIZE)
- printk(KERN_CONT "NULL pointer dereference");
- else
- printk(KERN_CONT "paging request");
-
- printk(KERN_CONT " at %px\n", (void *) address);
+ pr_alert("BUG: unable to handle kernel %s at %px\n",
+ address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
+ (void *)address);
dump_pagetable(address);
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e01f7ceb9e7a..77873ce700ae 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
- if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ if (!pgd_present(*pgd))
continue;
for (i = 0; i < PTRS_PER_P4D; i++) {
p4d = p4d_offset(pgd,
pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
- if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+ if (!p4d_present(*p4d))
continue;
pud = (pud_t *)p4d_page_vaddr(*p4d);
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 246145b84a12..4d9dc3f2fd70 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -610,21 +610,41 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
*/
for (int i = 0; i < NGREG; i++) {
greg_t req = requested_regs[i], res = resulting_regs[i];
+
if (i == REG_TRAPNO || i == REG_IP)
continue; /* don't care */
- if (i == REG_SP) {
- printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
- (unsigned long long)res);
+ if (i == REG_SP) {
/*
- * In many circumstances, the high 32 bits of rsp
- * are zeroed. For example, we could be a real
- * 32-bit program, or we could hit any of a number
- * of poorly-documented IRET or segmented ESP
- * oddities. If this happens, it's okay.
+ * If we were using a 16-bit stack segment, then
+ * the kernel is a bit stuck: IRET only restores
+ * the low 16 bits of ESP/RSP if SS is 16-bit.
+ * The kernel uses a hack to restore bits 31:16,
+ * but that hack doesn't help with bits 63:32.
+ * On Intel CPUs, bits 63:32 end up zeroed, and, on
+ * AMD CPUs, they leak the high bits of the kernel
+ * espfix64 stack pointer. There's very little that
+ * the kernel can do about it.
+ *
+ * Similarly, if we are returning to a 32-bit context,
+ * the CPU will often lose the high 32 bits of RSP.
*/
- if (res == (req & 0xFFFFFFFF))
- continue; /* OK; not expected to work */
+
+ if (res == req)
+ continue;
+
+ if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+ printf("[NOTE]\tSP: %llx -> %llx\n",
+ (unsigned long long)req,
+ (unsigned long long)res);
+ continue;
+ }
+
+ printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)requested_regs[i],
+ (unsigned long long)resulting_regs[i]);
+ nerrs++;
+ continue;
}
bool ignore_reg = false;
@@ -654,25 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
#endif
/* Sanity check on the kernel */
- if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+ if (i == REG_CX && req != res) {
printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
- (unsigned long long)requested_regs[i],
- (unsigned long long)resulting_regs[i]);
+ (unsigned long long)req,
+ (unsigned long long)res);
nerrs++;
continue;
}
- if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
- /*
- * SP is particularly interesting here. The
- * usual cause of failures is that we hit the
- * nasty IRET case of returning to a 16-bit SS,
- * in which case bits 16:31 of the *kernel*
- * stack pointer persist in ESP.
- */
+ if (req != res && !ignore_reg) {
printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
- i, (unsigned long long)requested_regs[i],
- (unsigned long long)resulting_regs[i]);
+ i, (unsigned long long)req,
+ (unsigned long long)res);
nerrs++;
}
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-03-31 10:36 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-03-31 10:36 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: bd47a85acd727e27b7283daff557865ad04c59f6 x86/platform/UV: Fix critical UV MMR address error
Two UV platform fixes, and a kbuild fix.
Thanks,
Ingo
------------------>
Andrew Banman (1):
x86/platform/uv/BAU: Add APIC idt entry
Sven Wegener (1):
x86/purgatory: Avoid creating stray .<pid>.d files, remove -MD from KBUILD_CFLAGS
mike.travis@hpe.com (1):
x86/platform/UV: Fix critical UV MMR address error
arch/x86/include/asm/hw_irq.h | 1 +
arch/x86/include/asm/uv/uv_mmrs.h | 2 +-
arch/x86/kernel/idt.c | 3 +++
arch/x86/platform/uv/tlb_uv.c | 2 --
arch/x86/purgatory/Makefile | 2 +-
5 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 2851077b6051..32e666e1231e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void uv_bau_message_intr1(void);
extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ecb9ddef128f..62c79e26a59a 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 56d99be3706a..5d039c848bd9 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -140,6 +140,9 @@ static const __initconst struct idt_data apic_idts[] = {
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
# endif
+#ifdef CONFIG_X86_UV
+ INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
+#endif
INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
INTG(ERROR_APIC_VECTOR, error_interrupt),
#endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index db77e087adaf..b36caae0fb2f 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2255,8 +2255,6 @@ static int __init uv_bau_init(void)
init_uvhub(uvhub, vector, uv_base_pnode);
}
- alloc_intr_gate(vector, uv_bau_message_intr1);
-
for_each_possible_blade(uvhub) {
if (uv_blade_nr_possible_cpus(uvhub)) {
unsigned long val;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2f15a2ac4209..d70c15de417b 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,7 +16,7 @@ KCOV_INSTRUMENT := n
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those. Like kexec-tools, use custom flags.
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
KBUILD_CFLAGS += -m$(BITS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-02-15 0:45 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-02-15 0:45 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: fd0e786d9d09024f67bd71ec094b110237dc3840 x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
Misc fixes all across the map:
- /proc/kcore vsyscall related fixes
- LTO fix
- build warning fix
- CPU hotplug fix
- Kconfig NR_CPUS cleanups
- cpu_has() cleanups/robustification
- .gitignore fix
- memory-failure unmapping fix
- UV platform fix
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
fs/proc/kcore.c # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
include/linux/kcore.h # 595dd46ebfc1: vfs/proc/kcore, x86/mm/kcore
include/linux/mm_inline.h # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u
mm/memory-failure.c # fd0e786d9d09: x86/mm, mm/hwpoison: Don't u
Thanks,
Ingo
------------------>
Arnd Bergmann (1):
x86/error_inject: Make just_return_func() globally visible
Borislav Petkov (1):
x86/MCE: Fix build warning introduced by "x86: do not use print_symbol()"
Ingo Molnar (1):
x86/Kconfig: Further simplify the NR_CPUS config
Jia Zhang (2):
vfs/proc/kcore, x86/mm/kcore: Fix SMAP fault when dumping vsyscall user page
x86/mm/kcore: Add vsyscall page to /proc/kcore conditionally
Masayoshi Mizuma (1):
x86/smpboot: Fix uncore_pci_remove() indexing bug when hot-removing a physical CPU
Peter Zijlstra (2):
x86/cpufeature: Reindent _static_cpu_has()
x86/cpufeature: Update _static_cpu_has() to use all named variables
Progyan Bhattacharya (1):
x86/build: Add arch/x86/tools/insn_decoder_test to .gitignore
Randy Dunlap (1):
x86/Kconfig: Simplify NR_CPUS config
Tony Luck (1):
x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
mike.travis@hpe.com (1):
x86/platform/UV: Fix GAM Range Table entries less than 1GB
arch/x86/.gitignore | 1 +
arch/x86/Kconfig | 75 ++++++++++++++++++++++-------
arch/x86/include/asm/cpufeature.h | 79 ++++++++++++++++---------------
arch/x86/include/asm/page_64.h | 4 --
arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++--
arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 ++++++
arch/x86/kernel/cpu/mcheck/mce.c | 19 +++++---
arch/x86/kernel/smpboot.c | 1 -
arch/x86/lib/error-inject.c | 1 +
arch/x86/mm/init_64.c | 4 +-
fs/proc/kcore.c | 4 ++
include/linux/kcore.h | 1 +
include/linux/mm_inline.h | 6 ---
mm/memory-failure.c | 2 -
14 files changed, 146 insertions(+), 81 deletions(-)
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
+tools/insn_decoder_test
purgatory/kexec-purgatory.c
purgatory/purgatory.ro
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 63bf349b2b24..a528c14d45a5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on X86_32 && SMP
- ---help---
- This option is needed for the systems that have more than 8 CPUs
-
config GOLDFISH
def_bool y
depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
Say N if unsure.
if X86_32
+config X86_BIGSMP
+ bool "Support for big SMP systems with more than 8 CPUs"
+ depends on SMP
+ ---help---
+ This option is needed for the systems that have more than 8 CPUs
+
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
@@ -949,25 +949,66 @@ config MAXSMP
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+# interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+ int
+ default NR_CPUS_RANGE_END if MAXSMP
+ default 1 if !SMP
+ default 2
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_32
+ default 64 if SMP && X86_BIGSMP
+ default 8 if SMP && !X86_BIGSMP
+ default 1 if !SMP
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_64
+ default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
+ default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_32
+ default 32 if X86_BIGSMP
+ default 8 if SMP
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_64
+ default 8192 if MAXSMP
+ default 64 if SMP
+ default 1 if !SMP
+
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
- range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 64 if SMP && X86_32 && X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
- range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
- default "1" if !SMP
- default "8192" if MAXSMP
- default "32" if SMP && X86_BIGSMP
- default "8" if SMP && X86_32
- default "64" if SMP
+ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ default NR_CPUS_DEFAULT
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
- This is purely to save memory - each supported CPU adds
- approximately eight kilobytes to the kernel image.
+ This is purely to save memory: each supported CPU adds about 8KB
+ to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P1\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
- [bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
- : : t_yes, t_no);
- t_yes:
- return true;
- t_no:
- return false;
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P[always]\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P[feature]\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : [feature] "i" (bit),
+ [always] "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+t_yes:
+ return true;
+t_no:
+ return false;
}
#define static_cpu_has(bit) \
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
uv_gre_table = gre;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ unsigned long size = ((unsigned long)(gre->limit - lgre)
+ << UV_GAM_RANGE_SHFT);
+ int order = 0;
+ char suffix[] = " KMGTPE";
+
+ while (size > 9999 && order < sizeof(suffix)) {
+ size /= 1024;
+ order++;
+ }
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- ((unsigned long)(gre->limit - lgre)) >>
- (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+ size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
extern struct mca_config mca_cfg;
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ * are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..8ff94d1e2dce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- pr_cont("{%pS}", (void *)m->ip);
+ pr_cont("{%pS}", (void *)(unsigned long)m->ip);
pr_cont("\n");
}
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
- memory_failure(pfn, 0);
+ if (!memory_failure(pfn, 0))
+ mce_unmap_kpfn(pfn);
}
return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
+ else
+ mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
return ret;
}
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
{
unsigned long decoy_addr;
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* We would like to just call:
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
- * speculative access to the posion page because we'd have
+ * speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
}
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..cfc61e1d45e2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1430,7 +1430,6 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
- c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
asm(
".type just_return_func, @function\n"
+ ".globl just_return_func\n"
"just_return_func:\n"
" ret\n"
".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ab42c852069..8ba9c3128947 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1193,8 +1193,8 @@ void __init mem_init(void)
register_page_bootmem_info();
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
- PAGE_SIZE, KCORE_OTHER);
+ if (get_gate_vma(&init_mm))
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
mem_init_print_info(NULL);
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e8a93bc8285d..d1e82761de81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -510,6 +510,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
/* we have to zero-fill user buffer even if no read */
if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
+ } else if (m->type == KCORE_USER) {
+ /* User page is handled prior to normal kernel page: */
+ if (copy_to_user(buffer, (char *)start, tsz))
+ return -EFAULT;
} else {
if (kern_addr_valid(start)) {
/*
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 7ff25a808fef..80db19d3a505 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -10,6 +10,7 @@ enum kcore_type {
KCORE_VMALLOC,
KCORE_RAM,
KCORE_VMEMMAP,
+ KCORE_USER,
KCORE_OTHER,
};
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c30b32e3c862..10191c28fc04 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
-#ifdef arch_unmap_kpfn
-extern void arch_unmap_kpfn(unsigned long pfn);
-#else
-static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
-#endif
-
#endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4b80ccee4535..8291b75f42c8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
return 0;
}
- arch_unmap_kpfn(pfn);
-
orig_head = hpage = compound_head(p);
num_poisoned_pages_inc();
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-01-18 0:24 ` Ingo Molnar
@ 2018-01-18 0:29 ` Andrew Morton
0 siblings, 0 replies; 547+ messages in thread
From: Andrew Morton @ 2018-01-18 0:29 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, Linux Kernel Mailing List, Thomas Gleixner,
H. Peter Anvin, Peter Zijlstra, Borislav Petkov
On Thu, 18 Jan 2018 01:24:08 +0100 Ingo Molnar <mingo@kernel.org> wrote:
>
> * Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> > On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > > - A kdump fix
> > >
> > > out-of-topic modifications in x86-urgent-for-linus:
> > > -----------------------------------------------------
> > > include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct add
> > > kernel/crash_core.c # 9f15b9120f56: kdump: Write the correct add
> >
> > This came through Andrew too. It all merged fine since there were no
> > other modifications, but it's a bit odd how this was in the x86 tree,
> > and even if that part makes sense it's a sign of lack of communication
> > at some point.
> >
> > Oh well. Not a big deal. I just thought I'd mention it.
>
> Hm, I don't think Andrew Cc:-ed me when he sent it to you, so I didn't notice the
> duplication. The bug was introduced via the 5-level paging changes in the x86
> tree, so I assumed the fix would go via the x86 tree as well.
>
> Andrew was Cc:-ed to the -tip commit.
I cc'ed the whole world on that one:
Acked-by: Baoquan He <bhe@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: <stable@vger.kernel.org>
but yeah, I make a lot of noise ;)
Ordinarily I'll auto-drop such things if they turn up in linux-next but
it seems this one got fast-tracked in -tip so that mechanism didn't
work (this is rare).
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-01-17 20:35 ` Linus Torvalds
@ 2018-01-18 0:24 ` Ingo Molnar
2018-01-18 0:29 ` Andrew Morton
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-01-18 0:24 UTC (permalink / raw)
To: Linus Torvalds
Cc: Linux Kernel Mailing List, Thomas Gleixner, H. Peter Anvin,
Peter Zijlstra, Borislav Petkov, Andrew Morton
* Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > - A kdump fix
> >
> > out-of-topic modifications in x86-urgent-for-linus:
> > -----------------------------------------------------
> > include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct add
> > kernel/crash_core.c # 9f15b9120f56: kdump: Write the correct add
>
> This came through Andrew too. It all merged fine since there were no
> other modifications, but it's a bit odd how this was in the x86 tree,
> and even if that part makes sense it's a sign of lack of communication
> at some point.
>
> Oh well. Not a big deal. I just thought I'd mention it.
Hm, I don't think Andrew Cc:-ed me when he sent it to you, so I didn't notice the
duplication. The bug was introduced via the 5-level paging changes in the x86
tree, so I assumed the fix would go via the x86 tree as well.
Andrew was Cc:-ed to the -tip commit.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2018-01-17 15:41 Ingo Molnar
@ 2018-01-17 20:35 ` Linus Torvalds
2018-01-18 0:24 ` Ingo Molnar
0 siblings, 1 reply; 547+ messages in thread
From: Linus Torvalds @ 2018-01-17 20:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linux Kernel Mailing List, Thomas Gleixner, H. Peter Anvin,
Peter Zijlstra, Borislav Petkov, Andrew Morton
On Wed, Jan 17, 2018 at 7:41 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> - A kdump fix
>
> out-of-topic modifications in x86-urgent-for-linus:
> -----------------------------------------------------
> include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct add
> kernel/crash_core.c # 9f15b9120f56: kdump: Write the correct add
This came through Andrew too. It all merged fine since there were no
other modifications, but it's a bit odd how this was in the x86 tree,
and even if that part makes sense it's a sign of lack of communication
at some point.
Oh well. Not a big deal. I just thought I'd mention it.
Linus
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-01-17 15:41 Ingo Molnar
2018-01-17 20:35 ` Linus Torvalds
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2018-01-17 15:41 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Borislav Petkov, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 45d55e7bac4028af93f5fa324e69958a0b868e96 x86/apic/vector: Fix off by one in error path
Misc fixes:
- A rather involved set of memory hardware encryption fixes to support the early
loading of microcode files via the initrd. These are larger than what we
normally take at such a late -rc stage, but there are two mitigating factors:
1) much of the changes are limited to the SME code itself 2) being able to
early load microcode has increased importance in the post-Meltdown/Spectre era.
- An IRQ vector allocator fix
- An Intel RDT driver use-after-free fix
- An APIC driver bug fix/revert to make certain older systems boot again
- A pkeys ABI fix
- TSC calibration fixes
- A kdump fix
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
include/linux/crash_core.h # 9f15b9120f56: kdump: Write the correct add
kernel/crash_core.c # 9f15b9120f56: kdump: Write the correct add
Thanks,
Ingo
------------------>
Andi Kleen (1):
x86/idt: Mark IDT tables __initconst
Eric W. Biederman (1):
x86/mm/pkeys: Fix fill_sig_info_pkey
Kirill A. Shutemov (1):
kdump: Write the correct address of mem_section into vmcoreinfo
Len Brown (3):
x86/tsc: Future-proof native_calibrate_tsc()
x86/tsc: Fix erroneous TSC rate on Skylake Xeon
x86/tsc: Print tsc_khz, when it differs from cpu_khz
Thomas Gleixner (2):
x86/intel_rdt/cqm: Prevent use after free
x86/apic/vector: Fix off by one in error path
Tom Lendacky (5):
x86/mm: Clean up register saving in the __enc_copy() assembly code
x86/mm: Use a struct to reduce parameters for SME PGD mapping
x86/mm: Centralize PMD flags in sme_encrypt_kernel()
x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
x86/mm: Encrypt the initrd earlier for BSP microcode update
Ville Syrjälä (1):
Revert "x86/apic: Remove init_bsp_APIC()"
arch/x86/include/asm/apic.h | 1 +
arch/x86/include/asm/mem_encrypt.h | 4 +-
arch/x86/kernel/apic/apic.c | 49 +++++
arch/x86/kernel/apic/vector.c | 7 +-
arch/x86/kernel/cpu/intel_rdt.c | 8 +-
arch/x86/kernel/head64.c | 4 +-
arch/x86/kernel/idt.c | 12 +-
arch/x86/kernel/irqinit.c | 3 +
arch/x86/kernel/setup.c | 10 --
arch/x86/kernel/tsc.c | 9 +-
arch/x86/mm/fault.c | 7 +-
arch/x86/mm/mem_encrypt.c | 356 +++++++++++++++++++++++++++----------
arch/x86/mm/mem_encrypt_boot.S | 80 +++++----
include/linux/crash_core.h | 2 +
kernel/crash_core.c | 2 +-
15 files changed, 391 insertions(+), 163 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
extern void apic_intr_mode_init(void);
extern void setup_local_APIC(void);
extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
return APIC_SYMMETRIC_IO;
}
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+ unsigned int value;
+
+ /*
+ * Don't do the setup now if we have a SMP BIOS as the
+ * through-I/O-APIC virtual wire mode might be active.
+ */
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+ return;
+
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+
+ /*
+ * Enable APIC.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+ /* This bit is reserved on P4/Xeon and should be cleared */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ (boot_cpu_data.x86 == 15))
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+ else
+#endif
+ value |= APIC_SPIV_FOCUS_DISABLED;
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write(APIC_SPIV, value);
+
+ /*
+ * Set up the virtual wire mode.
+ */
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ value = APIC_DM_NMI;
+ if (!lapic_is_integrated()) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ if (apic_extnmi == APIC_EXTNMI_NONE)
+ value |= APIC_LVT_MASKED;
+ apic_write(APIC_LVT1, value);
+}
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err);
- if (err)
+ if (err) {
+ irqd->chip_data = NULL;
+ free_apic_chip_data(apicd);
goto error;
+ }
}
return 0;
error:
- x86_vector_free_irqs(domain, virq, i + 1);
+ x86_vector_free_irqs(domain, virq, i);
return err;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
- kfree(d->ctrl_val);
- kfree(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
list_del(&d->list);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ kfree(d->ctrl_val);
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
kfree(d);
return;
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();
- /* Encrypt the kernel (if SME is active) */
- sme_encrypt_kernel();
+ /* Encrypt the kernel and related (if SME is active) */
+ sme_encrypt_kernel(bp);
/*
* Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS.
*/
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/*
* The APIC and SMP idt entries
*/
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault),
};
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3),
};
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized.
*/
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip;
int i;
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+ init_bsp_APIC();
+#endif
legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
- /*
- * If SME is active, this memory will be marked encrypted by the
- * kernel when it is accessed (including relocation). However, the
- * ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it. For SEV the
- * ramdisk will already be encrypted, so only do this for SME.
- */
- if (sme_active())
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
}
}
+ if (crystal_khz == 0)
+ return 0;
/*
* TSC frequency determined by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
+ if (cpu_khz != tsc_khz) {
+ pr_info("Detected %lu.%03lu MHz TSC",
+ (unsigned long)tsc_khz / 1000,
+ (unsigned long)tsc_khz % 1000);
+ }
+
/* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+ u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
/* Fault not from Protection Keys: nothing to do */
- if (si_code != SEGV_PKUERR)
+ if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return;
/*
* force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, pkey);
+ fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
- unsigned long end)
+struct sme_populate_pgd_data {
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ pmdval_t pmd_flags;
+ pteval_t pte_flags;
+ unsigned long paddr;
+
+ unsigned long vaddr;
+ unsigned long vaddr_end;
+};
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
- pgd_start = start & PGDIR_MASK;
- pgd_end = end & PGDIR_MASK;
+ pgd_start = ppd->vaddr & PGDIR_MASK;
+ pgd_end = ppd->vaddr_end & PGDIR_MASK;
- pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
- pgd_size *= sizeof(pgd_t);
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
- pgd_p = pgd_base + pgd_index(start);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
memset(pgd_p, 0, pgd_size);
}
-#define PGD_FLAGS _KERNPG_TABLE_NOENC
-#define P4D_FLAGS _KERNPG_TABLE_NOENC
-#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
- unsigned long vaddr, pmdval_t pmd_val)
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
pud_t *pud_p;
pmd_t *pmd_p;
- pgd_p = pgd_base + pgd_index(vaddr);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
if (native_pgd_val(*pgd_p)) {
if (IS_ENABLED(CONFIG_X86_5LEVEL))
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
pgd_t pgd;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p = pgtable_area;
+ p4d_p = ppd->pgtable_area;
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
- pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+ ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
} else {
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
}
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
}
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p += p4d_index(vaddr);
+ p4d_p += p4d_index(ppd->vaddr);
if (native_p4d_val(*p4d_p)) {
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
} else {
p4d_t p4d;
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
native_set_p4d(p4d_p, p4d);
}
}
- pud_p += pud_index(vaddr);
+ pud_p += pud_index(ppd->vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- goto out;
+ return NULL;
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
pud_t pud;
- pmd_p = pgtable_area;
+ pmd_p = ppd->pgtable_area;
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
- pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+ ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
native_set_pud(pud_p, pud);
}
- pmd_p += pmd_index(vaddr);
+ return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+ pmd_t *pmd_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+ native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
-out:
- return pgtable_area;
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pmd_t *pmd_p;
+ pte_t *pte_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(ppd->vaddr);
+ if (native_pmd_val(*pmd_p)) {
+ if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+ return;
+
+ pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pmd_t pmd;
+
+ pte_p = ppd->pgtable_area;
+ memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+ pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+ native_set_pmd(pmd_p, pmd);
+ }
+
+ pte_p += pte_index(ppd->vaddr);
+ if (!native_pte_val(*pte_p))
+ native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+
+ ppd->vaddr += PMD_PAGE_SIZE;
+ ppd->paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+
+ ppd->vaddr += PAGE_SIZE;
+ ppd->paddr += PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+{
+ unsigned long vaddr_end;
+
+ ppd->pmd_flags = pmd_flags;
+ ppd->pte_flags = pte_flags;
+
+ /* Save original end value since we modify the struct value */
+ vaddr_end = ppd->vaddr_end;
+
+ /* If start is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+ __sme_map_range_pte(ppd);
+
+ /* Create PMD entries */
+ ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+ __sme_map_range_pmd(ppd);
+
+ /* If end is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = vaddr_end;
+ __sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
{
- unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long p4d_size, pud_size, pmd_size, pte_size;
unsigned long total;
/*
* Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. That mappings will be covered by 2MB
- * PMD entries so we can conservatively calculate the required
+ * entries that are needed. Those mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
* number of P4D, PUD and PMD structures needed to perform the
- * mappings. Incrementing the count for each covers the case where
- * the addresses cross entries.
+ * mappings. For mappings that are not 2MB aligned, PTE mappings
+ * would be needed for the start and end portion of the address range
+ * that fall outside of the 2MB alignment. This results in, at most,
+ * two extra pages to hold PTE entries for each range that is mapped.
+ * Incrementing the count for each covers the case where the addresses
+ * cross entries.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
}
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+ pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
- total = p4d_size + pud_size + pmd_size;
+ total = p4d_size + pud_size + pmd_size + pte_size;
/*
* Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
+ struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
- unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
- void *pgtable_area;
- pgd_t *pgd;
if (!sme_active())
return;
/*
- * Prepare for encrypting the kernel by building new pagetables with
- * the necessary attributes needed to encrypt the kernel in place.
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
*
* One range of virtual addresses will map the memory occupied
- * by the kernel as encrypted.
+ * by the kernel and initrd as encrypted.
*
* Another range of virtual addresses will map the memory occupied
- * by the kernel as decrypted and write-protected.
+ * by the kernel and initrd as decrypted and write-protected.
*
* The use of write-protect attribute will prevent any of the
* memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start;
+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
/* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end;
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
*/
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
/* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
/*
* The total workarea includes the executable encryption area and
- * the pagetable area.
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
*/
workarea_len = execute_len + pgtable_area_len;
- workarea_end = workarea_start + workarea_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
/*
* Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
* pagetables and when the new encrypted and decrypted kernel
* mappings are populated.
*/
- pgtable_area = (void *)execute_end;
+ ppd.pgtable_area = (void *)execute_end;
/*
* Make sure the current pagetable structure has entries for
* addressing the workarea.
*/
- pgd = (pgd_t *)native_read_cr3_pa();
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.pgd = (pgd_t *)native_read_cr3_pa();
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
/*
* A new pagetable structure is being built to allow for the kernel
- * to be encrypted. It starts with an empty PGD that will then be
- * populated with new PUDs and PMDs as the encrypted and decrypted
- * kernel mappings are created.
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
*/
- pgd = pgtable_area;
- memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
- pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
-
- /* Add encrypted kernel (identity) mappings */
- pmd_flags = PMD_FLAGS | _PAGE_ENC;
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + pmd_flags);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.pgd = ppd.pgtable_area;
+ memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
/*
* A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
* the base of the mapping.
*/
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
decrypted_base <<= PGDIR_SHIFT;
+ /* Add encrypted kernel (identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start;
+ ppd.vaddr_end = kernel_end;
+ sme_map_range_encrypted(&ppd);
+
/* Add decrypted, write-protected kernel (non-identity) mappings */
- pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + pmd_flags);
-
- paddr += PMD_PAGE_SIZE;
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start;
+ ppd.vaddr_end = initrd_end;
+ sme_map_range_encrypted(&ppd);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
}
/* Add decrypted workarea mappings to both kernel mappings */
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(&ppd);
/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
- kernel_len, workarea_start, (unsigned long)pgd);
+ kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)ppd.pgd);
/*
* At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries.
*/
- sme_clear_pgd(pgd, kernel_start + decrypted_base,
- kernel_end + decrypted_base);
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ if (initrd_len) {
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+ }
- sme_clear_pgd(pgd, workarea_start + decrypted_base,
- workarea_end + decrypted_base);
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_clear_pgd(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
/*
* Entry parameters:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
- * RDX - length of kernel
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
+ * RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
push %r12
- movq %rdi, %r10 /* Encrypted kernel */
- movq %rsi, %r11 /* Decrypted kernel */
- movq %rdx, %r12 /* Kernel length */
+ movq %rdi, %r10 /* Encrypted area */
+ movq %rsi, %r11 /* Decrypted area */
+ movq %rdx, %r12 /* Area length */
/* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb
/* Setup registers for call */
- movq %r10, %rdi /* Encrypted kernel */
- movq %r11, %rsi /* Decrypted kernel */
+ movq %r10, %rdi /* Encrypted area */
+ movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */
- movq %r12, %rcx /* Kernel length */
+ movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
ENTRY(__enc_copy)
/*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution.
*
* On entry the registers must be:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption
- * RCX - length of kernel
+ * RCX - length of area
* R8 - intermediate copy buffer
*
* RAX - points to this routine
*
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
*/
/* Enable the new page tables */
mov %rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
orq $X86_CR4_PGE, %rdx
mov %rdx, %cr4
+ push %r15
+ push %r12
+
+ movq %rcx, %r9 /* Save area length */
+ movq %rdi, %r10 /* Save encrypted area address */
+ movq %rsi, %r11 /* Save decrypted area address */
+
/* Set the PAT register PA5 entry to write-protect */
- push %rcx
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- push %rdx /* Save original PAT value */
+ mov %rdx, %r15 /* Save original PAT value */
andl $0xffff00ff, %edx /* Clear PA5 */
orl $0x00000500, %edx /* Set PA5 to WP */
wrmsr
- pop %rdx /* RDX contains original PAT value */
- pop %rcx
-
- movq %rcx, %r9 /* Save kernel length */
- movq %rdi, %r10 /* Save encrypted kernel address */
- movq %rsi, %r11 /* Save decrypted kernel address */
wbinvd /* Invalidate any cache entries */
- /* Copy/encrypt 2MB at a time */
+ /* Copy/encrypt up to 2MB at a time */
+ movq $PMD_PAGE_SIZE, %r12
1:
- movq %r11, %rsi /* Source - decrypted kernel */
+ cmpq %r12, %r9
+ jnb 2f
+ movq %r9, %r12
+
+2:
+ movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */
- movq %r10, %rdi /* Dest - encrypted kernel */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r10, %rdi /* Dest - encrypted area */
+ movq %r12, %rcx
rep movsb
- addq $PMD_PAGE_SIZE, %r11
- addq $PMD_PAGE_SIZE, %r10
- subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
+ addq %r12, %r11
+ addq %r12, %r10
+ subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */
/* Restore PAT register */
- push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- pop %rdx /* Restore original PAT value */
+ mov %r15, %rdx /* Restore original PAT value */
wrmsr
+ pop %r12
+ pop %r15
+
ret
.L__enc_copy_end:
ENDPROC(__enc_copy)
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef30449..b511f6d24b42 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
#define VMCOREINFO_SYMBOL(name) \
vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
#define VMCOREINFO_SIZE(name) \
vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
(unsigned long)sizeof(name))
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..4f63597c824d 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(contig_page_data);
#endif
#ifdef CONFIG_SPARSEMEM
- VMCOREINFO_SYMBOL(mem_section);
+ VMCOREINFO_SYMBOL_ARRAY(mem_section);
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
VMCOREINFO_STRUCT_SIZE(mem_section);
VMCOREINFO_OFFSET(mem_section, section_mem_map);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2018-01-12 13:56 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2018-01-12 13:56 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 9d0513d82f1a8fe17b41f113ac5922fa57dbaf5c x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"
There two pending (non-PTI) x86 fixes:
- an Intel-MID crash fix
- and an Intel microcode loader blacklist quirk to avoid a problematic revision.
Thanks,
Ingo
------------------>
Andy Shevchenko (1):
x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const"
Jia Zhang (1):
x86/microcode/intel: Extend BDW late-loading with a revision check
arch/x86/kernel/cpu/microcode/intel.c | 13 +++++++++++--
arch/x86/platform/intel-mid/device_libs/platform_bt.c | 2 +-
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8ccdca6d3f9e..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * may result in a system hang. This behavior is documented in item
+ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
return 0;
}
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
^ permalink raw reply related [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-17 11:41 ` Thomas Gleixner
@ 2017-12-17 15:15 ` Borislav Petkov
0 siblings, 0 replies; 547+ messages in thread
From: Borislav Petkov @ 2017-12-17 15:15 UTC (permalink / raw)
To: Thomas Gleixner, Andy Lutomirski
Cc: Ingo Molnar, Linus Torvalds, linux-kernel, H. Peter Anvin,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf
On Sun, Dec 17, 2017 at 12:41:42PM +0100, Thomas Gleixner wrote:
> > X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> > DISABLED_BUGS or whatever if it's not configured in, which will reduce
> > bloat. Borislav, that's kind of up your alley, since I don't think
> > the appropriate mask even exists right now.
>
> Will have a look.
Right, I converted X86_BUG_CPU_SECURE_MODE_PTI bug bit to an X86_FEATURE
bit and then the DISABLED_FEATURES thing should just work.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-17 3:25 ` Andy Lutomirski
2017-12-17 8:32 ` Ingo Molnar
@ 2017-12-17 11:41 ` Thomas Gleixner
2017-12-17 15:15 ` Borislav Petkov
1 sibling, 1 reply; 547+ messages in thread
From: Thomas Gleixner @ 2017-12-17 11:41 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Ingo Molnar, Linus Torvalds, linux-kernel, H. Peter Anvin,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov
On Sat, 16 Dec 2017, Andy Lutomirski wrote:
> On Fri, Dec 15, 2017 at 8:07 AM, Ingo Molnar <mingo@kernel.org> wrote:
> I few things I noticed in the PTI tree:
>
> "x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
> Sorry, my bad, I've spent *way* too long looking at this crap to
> retain my sanity. Also, if you're feeling like being super tidy, the
> init/main.c change in their could be folded in to whatever patch adds
> pti_init() in the first place, but it doesn't really matter.
I split it out into a separate patch and got rid of all the ifdef mess
while at it.
> "x86/pti: Map the vsyscall page if needed" has a change to
> pgtable_64.h that could be folded into an earlier patch. This is
> probably my fault for applying Dave Hansen's cleanup request to the
> wrong patch.
Done
> "x86/mm/64: Make a full PGD-entry size hole in the memory map" would
> benefit from a mention of "5-level" somewhere in the subject or
> changelog.
Will do
> In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
> function "set_percpu_fixmap_ptes" is misnamed. It should be something
> like "allocate_percpu_fixmap_ptes", perhaps,
Ok.
> and it should either warn
> or do nothing if the PTE is already present, I think. As it stands,
> it's a wee bit dangerous.
Well, it's not more dangerous than all the other fixmap stuff we are
doing. The only difference is that it does not install a mapping, it just
makes sure that the PTE page is populated.
> X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> DISABLED_BUGS or whatever if it's not configured in, which will reduce
> bloat. Borislav, that's kind of up your alley, since I don't think
> the appropriate mask even exists right now.
Will have a look.
Thanks,
tglx
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-17 3:25 ` Andy Lutomirski
@ 2017-12-17 8:32 ` Ingo Molnar
2017-12-17 11:41 ` Thomas Gleixner
1 sibling, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-12-17 8:32 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov
* Andy Lutomirski <luto@kernel.org> wrote:
> > Is this tree looking good to you standalone?
>
> I think this stuff is looking okay, although I'm a bit mystified by
> the whole merge base thing. But if the backporters and Linus like it,
> then whatever.
To explain the merge base thing: considering that v4.14 is an LTS kernel the PTI
merge base thing is really mostly about keeping this commit count low:
triton:~/tip> git log --no-merges --oneline v4.14..tip/WIP.x86/pti.base | wc -l
63
Versus the closest upstream work-alike equivalent base tree, which is, roughly:
triton:~/tip> git log --no-merges --oneline v4.14..99306dfc067e | wc -l
1088
1088 is a lot of commits to backport, all sourced from early in the merge window -
which would also require the identification of ~dozens of random followup fixes
later in the merge window and the -rc process - which fixes might not even
cherry-pick cleanly due to other interactions...
So the upstream price of the 'PTI merge base' is 9 cherry-picks, to avoid
backporting either 1000+ commits to -stable (not workable to -stable folks,
especially as such wide backports also tend to explode exponentially by pulling in
random dependencies as you try to backport them further back) or the non-Git
backporting of 100+ iffy low level x86 entry code commits to the LTS kernel (still
a nightmare to both the -stable and us x86 maintainers).
Note that doing this also allows tip:x86/pti to remain on this almost-v4.14 base
for a few more weeks after an upstream merge, so it can collect any eventual fixes
and minor enhancements into a linear series of commits. This, considering the
complexity of PTI, is good both for bisectability and for backporting.
So as long as the ~9 cherry-picks are cleanly structured and are explicitly
marked, this is the best all around solution we could think of.
( The somewhat weird git-merge gynastics in the tree I sent to Linus are really
about merging specific versions of upstream that are content-equivalent with
the cherry-picks, to avoid massive conflicts. v4.15 had various other changes to
the files affected by the cherry-picking. If done naively the cherry-picked tree
merge can generate over a dozen nasty conflicts. Doing it this way also creates
more confidence in the cherry-picked base tree itself: -stable folks can trust
it more because it will be 'obviously' equivalent to upstream as expressed by
the conflict-free merge. )
I believe the fact that the PTI patches have already been successfully backported
to v4.9, with only minor additional cherry-picking, demonstrates that this is the
right approach.
> I few things I noticed in the PTI tree:
>
> "x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
> Sorry, my bad, I've spent *way* too long looking at this crap to
> retain my sanity. Also, if you're feeling like being super tidy, the
> init/main.c change in their could be folded in to whatever patch adds
> pti_init() in the first place, but it doesn't really matter.
>
> "x86/pti: Map the vsyscall page if needed" has a change to
> pgtable_64.h that could be folded into an earlier patch. This is
> probably my fault for applying Dave Hansen's cleanup request to the
> wrong patch.
>
> "x86/mm/64: Make a full PGD-entry size hole in the memory map" would
> benefit from a mention of "5-level" somewhere in the subject or
> changelog.
>
> In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
> function "set_percpu_fixmap_ptes" is misnamed. It should be something
> like "allocate_percpu_fixmap_ptes", perhaps, and it should either warn
> or do nothing if the PTE is already present, I think. As it stands,
> it's a wee bit dangerous.
>
> X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
> DISABLED_BUGS or whatever if it's not configured in, which will reduce
> bloat. Borislav, that's kind of up your alley, since I don't think
> the appropriate mask even exists right now.
>
>
>
> Anyway, I stuck a few minor fixups here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/log/?h=x86/pti
Thanks, we'll integrate all of this!
Also note that regarding PTI LDT handling, our plan is to stick to the review
feedback consensus: i.e. we'll apply your PTI LDT fixes as the initial approach,
plus an optional series later on once PTI is upstream, with the VMA based bits by
Thomas and PeterZ subject to a fresh round of thinking & evaluation. Even if we
decide to do the VMA approach, I don't think that aspect of PTI will be
backported.
This should further simplify logistics and offloads risks as well.
Ingo
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-15 16:07 ` Ingo Molnar
@ 2017-12-17 3:25 ` Andy Lutomirski
2017-12-17 8:32 ` Ingo Molnar
2017-12-17 11:41 ` Thomas Gleixner
0 siblings, 2 replies; 547+ messages in thread
From: Andy Lutomirski @ 2017-12-17 3:25 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andy Lutomirski, Linus Torvalds, linux-kernel, Thomas Gleixner,
H. Peter Anvin, Peter Zijlstra, Andrew Morton, Josh Poimboeuf,
Borislav Petkov
On Fri, Dec 15, 2017 at 8:07 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@kernel.org> wrote:
>
>> On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
>> > Linus,
>>
>> >
>> > - two 5-level paging related fixes
>>
>> Which reminds me: can you grab this one, too?
>>
>> https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org
>
> Yeah, done - it's now in x86/urgent as:
>
> c739f930be1d: x86/espfix/64: Fix espfix double-fault handling on 5-level systems
>
> Will push it out soon. Linus will get it with the next x86/urgent batch, as
> there's no production 5-level paging CPUs out there yet, right?
>
> I'm also picking up your pending PTI fixes/updates over the weekend, but I spent
> today mostly to make sure that the system call trampoline bits and its
> dependencies were robust in practice.
>
> Could you please have a good look at that tree:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tmp.tmp
>
> This includes the preparatory merges and cherry-picks, and some other low risk
> preparatory bits related to PTI.
>
> Is this tree looking good to you standalone?
I think this stuff is looking okay, although I'm a bit mystified by
the whole merge base thing. But if the backporters and Linus like it,
then whatever.
I few things I noticed in the PTI tree:
"x86/mm/pti: Map ESPFIX into user space" has a leftover pr_err().
Sorry, my bad, I've spent *way* too long looking at this crap to
retain my sanity. Also, if you're feeling like being super tidy, the
init/main.c change in their could be folded in to whatever patch adds
pti_init() in the first place, but it doesn't really matter.
"x86/pti: Map the vsyscall page if needed" has a change to
pgtable_64.h that could be folded into an earlier patch. This is
probably my fault for applying Dave Hansen's cleanup request to the
wrong patch.
"x86/mm/64: Make a full PGD-entry size hole in the memory map" would
benefit from a mention of "5-level" somewhere in the subject or
changelog.
In "x86/fixmap: Add debugstore entries to cpu_entry_area", I think the
function "set_percpu_fixmap_ptes" is misnamed. It should be something
like "allocate_percpu_fixmap_ptes", perhaps, and it should either warn
or do nothing if the PTE is already present, I think. As it stands,
it's a wee bit dangerous.
X86_BUG_CPU_SECURE_MODE_PTI should be added to DISABLED_FEATURES or
DISABLED_BUGS or whatever if it's not configured in, which will reduce
bloat. Borislav, that's kind of up your alley, since I don't think
the appropriate mask even exists right now.
Anyway, I stuck a few minor fixups here:
https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/log/?h=x86/pti
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-15 15:50 ` Andy Lutomirski
@ 2017-12-15 16:07 ` Ingo Molnar
2017-12-17 3:25 ` Andy Lutomirski
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2017-12-15 16:07 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Borislav Petkov
* Andy Lutomirski <luto@kernel.org> wrote:
> On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > Linus,
>
> >
> > - two 5-level paging related fixes
>
> Which reminds me: can you grab this one, too?
>
> https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org
Yeah, done - it's now in x86/urgent as:
c739f930be1d: x86/espfix/64: Fix espfix double-fault handling on 5-level systems
Will push it out soon. Linus will get it with the next x86/urgent batch, as
there's no production 5-level paging CPUs out there yet, right?
I'm also picking up your pending PTI fixes/updates over the weekend, but I spent
today mostly to make sure that the system call trampoline bits and its
dependencies were robust in practice.
Could you please have a good look at that tree:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tmp.tmp
This includes the preparatory merges and cherry-picks, and some other low risk
preparatory bits related to PTI.
Is this tree looking good to you standalone?
Note that the lack of runtime patching on non-Intel systems by default is
intentionally not included at this stage, so that it's all simpler, and that we
get more test coverage...
This tree is what I've been testing all day, including suspend/resume testing, and
which I wanted to send to Linus later today as an RFC pull request, assuming
there's no last minute showstoppers. (Right after I've seen the new Star Wars
movie with my son - because priorities! ;-)
Thanks,
Ingo
^ permalink raw reply [flat|nested] 547+ messages in thread
* Re: [GIT PULL] x86 fixes
2017-12-15 15:43 Ingo Molnar
@ 2017-12-15 15:50 ` Andy Lutomirski
2017-12-15 16:07 ` Ingo Molnar
0 siblings, 1 reply; 547+ messages in thread
From: Andy Lutomirski @ 2017-12-15 15:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, linux-kernel, Thomas Gleixner, H. Peter Anvin,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf, Andy Lutomirski,
Borislav Petkov
On Fri, Dec 15, 2017 at 7:43 AM, Ingo Molnar <mingo@kernel.org> wrote:
> Linus,
>
> - two 5-level paging related fixes
Which reminds me: can you grab this one, too?
https://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org
^ permalink raw reply [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-12-15 15:43 Ingo Molnar
2017-12-15 15:50 ` Andy Lutomirski
0 siblings, 1 reply; 547+ messages in thread
From: Ingo Molnar @ 2017-12-15 15:43 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton, Josh Poimboeuf, Andy Lutomirski, Borislav Petkov
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 215eada73e77ede7e15531d99f712481ddd429be objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version
Misc fixes:
- Fix the s2ram regression related to confusion around segment register
restoration, plus related cleanups that make the code more robust
- a guess-unwinder Kconfig dependency fix
- an isoimage build target fix for certain tool chain combinations
- instruction decoder opcode map fixes+updates, and the syncing of the kernel
decoder headers to the objtool headers
- a kmmio tracing fix
- two 5-level paging related fixes
- a topology enumeration fix on certain SMP systems
Thanks,
Ingo
------------------>
Andrey Ryabinin (1):
x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y
Andy Lutomirski (3):
x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
x86/power: Make restore_processor_context() sane
Changbin Du (1):
x86/build: Don't verify mtools configuration file for isoimage
Ingo Molnar (1):
objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version
Karol Herbst (1):
x86/mm/kmmio: Fix mmiotrace for page unaligned addresses
Kirill A. Shutemov (2):
x86/boot/compressed/64: Detect and handle 5-level paging at boot-time
x86/boot/compressed/64: Print error if 5-level paging is not supported
Prarit Bhargava (1):
x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation
Randy Dunlap (1):
x86/decoder: Fix and update the opcodes map
arch/x86/Kconfig.debug | 1 +
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/boot/compressed/head_64.S | 16 +++-
arch/x86/boot/compressed/misc.c | 16 ++++
arch/x86/boot/compressed/pgtable_64.c | 28 ++++++
arch/x86/boot/genimage.sh | 4 +-
arch/x86/include/asm/suspend_32.h | 8 +-
arch/x86/include/asm/suspend_64.h | 19 ++++-
arch/x86/kernel/smpboot.c | 4 +-
arch/x86/lib/x86-opcode-map.txt | 13 ++-
arch/x86/mm/ioremap.c | 4 +-
arch/x86/mm/kmmio.c | 12 +--
arch/x86/power/cpu.c | 99 ++++++++++------------
tools/objtool/arch/x86/insn/inat.h | 10 +++
tools/objtool/arch/x86/insn/x86-opcode-map.txt | 15 +++-
.../perf/util/intel-pt-decoder/x86-opcode-map.txt | 15 +++-
16 files changed, 184 insertions(+), 81 deletions(-)
create mode 100644 arch/x86/boot/compressed/pgtable_64.c
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
+ depends on !STACKDEPOT
---help---
This option enables the "guess" unwinder for unwinding kernel stack
traces. It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
endif
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
#ifdef CONFIG_X86_5LEVEL
- /* Check if 5-level paging has already enabled */
- movq %cr4, %rax
- testl $X86_CR4_LA57, %eax
- jnz lvl5
+ /*
+ * Check if we need to enable 5-level paging.
+ * RSI holds real mode data and need to be preserved across
+ * a function call.
+ */
+ pushq %rsi
+ call l5_paging_required
+ popq %rsi
+
+ /* If l5_paging_required() returned zero, we're done here. */
+ cmpq $0, %rax
+ je lvl5
/*
* At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
}
}
+static bool l5_supported(void)
+{
+ /* Check if leaf 7 is supported. */
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+ error("This linux kernel as configured requires 5-level paging\n"
+ "This CPU does not support the required 'cr4.la57' feature\n"
+ "Unable to boot - please use a kernel appropriate for your CPU\n");
+ }
+
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+ /* Check if leaf 7 is supported. */
+
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return 0;
+
+ /* Check if 5-level paging has already been enabled. */
+ if (native_read_cr4() & X86_CR4_LA57)
+ return 0;
+
+ return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..c9e8499fbfe7 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
# Make sure the files actually exist
verify "$FBZIMAGE"
-verify "$MTOOLSRC"
genbzdisk() {
+ verify "$MTOOLSRC"
mformat a:
syslinux $FIMAGE
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
}
genfdimage144() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
mformat v:
syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
}
genfdimage288() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
mformat w:
syslinux $FIMAGE
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
- u16 es, fs, gs, ss;
+ /*
+ * On x86_32, all segment registers, with the possible exception of
+ * gs, are saved at kernel entry in pt_regs.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+ u16 gs;
+#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
- u16 ds, es, fs, gs, ss;
- unsigned long gs_base, gs_kernel_base, fs_base;
+
+ /*
+ * User CS and SS are saved in current_pt_regs(). The rest of the
+ * segment selectors need to be saved and restored here.
+ */
+ u16 ds, es, fs, gs;
+
+ /*
+ * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+ * so we save them separately. We save the kernelmode GSBASE to
+ * restore percpu access after resume.
+ */
+ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 05a97d5fe298..35cb20994e32 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
/* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
@@ -1304,7 +1304,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
*/
- ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
return;
}
+ mmiotrace_iounmap(addr);
+
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
- mmiotrace_iounmap(addr);
-
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
unsigned long flags;
int ret = 0;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
- if (get_kmmio_probe(p->addr)) {
+ if (get_kmmio_probe(addr)) {
ret = -EEXIST;
goto out;
}
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
- if (add_kmmio_fault_page(p->addr + size))
+ if (add_kmmio_fault_page(addr + size))
pr_err("Unable to set page fault.\n");
size += page_level_size(l);
}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long flags;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
- release_kmmio_fault_page(p->addr + size, &release_list);
+ release_kmmio_fault_page(addr + size, &release_list);
size += page_level_size(l);
}
list_del_rcu(&p->list);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 5191de14f4df..36a28eddb435 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
-#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
-#ifdef CONFIG_X86_32
- savesegment(es, ctxt->es);
- savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+ savesegment(gs, ctxt->gs);
+ savesegment(fs, ctxt->fs);
+ savesegment(ds, ctxt->ds);
+ savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
@@ -178,6 +170,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
+#else
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
@@ -190,9 +185,12 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
@@ -215,57 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
+ /* Restore the IDT. */
+ load_idt(&ctxt->idt);
+
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
+ * Just in case the asm code got us here with the SS, DS, or ES
+ * out of sync with the GDT, update them.
*/
-#ifdef CONFIG_X86_32
- load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
+ loadsegment(ss, __KERNEL_DS);
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
-#ifdef CONFIG_X86_64
/*
- * We need GSBASE restored before percpu access can work.
- * percpu access can happen in exception handlers or in complicated
- * helpers like load_gs_index().
+ * Restore percpu access. Percpu access can happen in exception
+ * handlers or in complicated helpers like load_gs_index().
*/
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+ loadsegment(fs, __KERNEL_PERCPU);
+ loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
+ /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
fix_processor_context();
/*
- * Restore segment registers. This happens after restoring the GDT
- * and LDT, which happen in fix_processor_context().
+ * Now that we have descriptor tables fully restored and working
+ * exception handling, restore the usermode segments.
*/
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+ loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
- loadsegment(gs, ctxt->gs);
- loadsegment(ss, ctxt->ss);
-
- /*
- * sysenter MSRs
- */
- if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
/*
- * Restore FSBASE and user GSBASE after reloading the respective
- * segment selectors.
+ * Restore FSBASE and GSBASE after restoring the selectors, since
+ * restoring the selectors clobbers the bases. Keep in mind
+ * that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+ loadsegment(gs, ctxt->gs);
#endif
do_fpu_end();
diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h
index 125ecd2a300d..52dc8d911173 100644
--- a/tools/objtool/arch/x86/insn/inat.h
+++ b/tools/objtool/arch/x86/insn/inat.h
@@ -97,6 +97,16 @@
#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE 0
+#define INAT_SEG_REG_DEFAULT 1
+#define INAT_SEG_REG_CS 2
+#define INAT_SEG_REG_SS 3
+#define INAT_SEG_REG_DS 4
+#define INAT_SEG_REG_ES 5
+#define INAT_SEG_REG_FS 6
+#define INAT_SEG_REG_GS 7
+
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 12e377184ee4..e0b85930dd77 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-12-06 22:36 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-12-06 22:36 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Borislav Petkov, Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 88edb57d1e0b262e669c5cad36646dcf5a7f37f5 x86/vdso: Change time() prototype to match __vdso_time()
Misc fixes:
- Make CR4 handling irq-safe, which bug vmware guests ran into
- Don't crash on early IRQs in Xen guests
- Don't crash secondary CPU bringup if #UD assisted WARN()ings are triggered
- Make X86_BUG_FXSAVE_LEAK optional on newer AMD CPUs that have the fix
- Fix AMD Fam17h microcode loading
- Fix broadcom_postcore_init() if ACPI is disabled
- Fix resume regression in __restore_processor_context()
- Fix Sparse warnings
- Fix a GCC-8 warning
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
x86/power: Fix some ordering bugs in __restore_processor_context()
Arnd Bergmann (1):
x86/vdso: Change time() prototype to match __vdso_time()
Chunyu Hu (1):
x86/idt: Load idt early in start_secondary
Colin Ian King (1):
x86: Fix Sparse warnings about non-static functions
Juergen Gross (1):
x86/xen: Support early interrupts in xen pv guests
Nadav Amit (2):
x86/tlb: Refactor CR4 setting and shadow write
x86/tlb: Disable interrupts when changing CR4
Rafael J. Wysocki (1):
x86/PCI: Make broadcom_postcore_init() check acpi_disabled
Rudolf Marek (1):
x86/cpufeatures: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD
Tom Lendacky (1):
x86/microcode/AMD: Add support for fam17h microcode loading
arch/x86/entry/vdso/vclock_gettime.c | 2 +-
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/segment.h | 12 ++++++++++++
arch/x86/include/asm/tlbflush.h | 35 ++++++++++++++++++----------------
arch/x86/kernel/apic/vector.c | 4 ++--
arch/x86/kernel/cpu/amd.c | 7 +++++--
arch/x86/kernel/cpu/microcode/amd.c | 4 ++++
arch/x86/kernel/process.c | 2 +-
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/mm/extable.c | 4 +++-
arch/x86/pci/broadcom_bus.c | 2 +-
arch/x86/platform/uv/uv_nmi.c | 4 ++--
arch/x86/power/cpu.c | 21 ++++++++++++++++----
arch/x86/xen/enlighten_pv.c | 37 +++++++++++++++++++++++-------------
arch/x86/xen/xen-asm_64.S | 14 ++++++++++++++
15 files changed, 107 insertions(+), 44 deletions(-)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
*t = result;
return result;
}
-int time(time_t *t)
+time_t time(time_t *t)
__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..800104c8a3ed 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -266,6 +266,7 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
*/
#define EARLY_IDT_HANDLER_SIZE 9
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
#ifndef __ASSEMBLY__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
/*
* Load a segment. Fall back on loading the zero segment if something goes
* wrong. This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..877b5c1a1b12 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -173,40 +173,43 @@ static inline void cr4_init_shadow(void)
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
+static inline void __cr4_set(unsigned long cr4)
+{
+ lockdep_assert_irqs_disabled();
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 | mask) != cr4) {
- cr4 |= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 | mask) != cr4)
+ __cr4_set(cr4 | mask);
+ local_irq_restore(flags);
}
/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 & ~mask) != cr4) {
- cr4 &= ~mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 & ~mask) != cr4)
+ __cr4_set(cr4 & ~mask);
+ local_irq_restore(flags);
}
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
{
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- cr4 ^= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ __cr4_set(cr4 ^ mask);
}
/* Read the CR4 shadow. */
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..750449152b04 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,8 +542,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
- struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind)
{
unsigned int cpu, vector, prev_cpu, prev_vector;
struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..bcb75dc97d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x17: init_amd_zn(c); break;
}
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
+ /*
+ * Enable workaround for FXSAVE leak on CPUs
+ * without a XSaveErPtr feature
+ */
+ if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
cpu_detect_cache_sizes(c);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..bb988a24db92 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -299,7 +299,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
if ((tifp ^ tifn) & _TIF_NOTSC)
- cr4_toggle_bits(X86_CR4_TSD);
+ cr4_toggle_bits_irqsoff(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..05a97d5fe298 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -237,7 +237,7 @@ static void notrace start_secondary(void *unused)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
-
+ load_current_idt();
cpu_init();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..88754bfd425f 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <xen/xen.h>
#include <asm/fpu/internal.h>
#include <asm/traps.h>
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* Old CPUs leave the high bits of CS on the stack
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
+ * Xen pv domains are not using the default __KERNEL_CS.
*/
- if (regs->cs != __KERNEL_CS)
+ if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
goto fail;
/*
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
/*
* UV NMI handler
*/
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
{
struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
}
/* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
{
int size = sizeof(void *) * (1 << NODES_SHIFT);
int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..5191de14f4df 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -226,8 +226,20 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
load_idt((const struct desc_ptr *)&ctxt->idt_limit);
#endif
+#ifdef CONFIG_X86_64
/*
- * segment registers
+ * We need GSBASE restored before percpu access can work.
+ * percpu access can happen in exception handlers or in complicated
+ * helpers like load_gs_index().
+ */
+ wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#endif
+
+ fix_processor_context();
+
+ /*
+ * Restore segment registers. This happens after restoring the GDT
+ * and LDT, which happen in fix_processor_context().
*/
#ifdef CONFIG_X86_32
loadsegment(es, ctxt->es);
@@ -248,13 +260,14 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
load_gs_index(ctxt->gs);
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
+ /*
+ * Restore FSBASE and user GSBASE after reloading the respective
+ * segment selectors.
+ */
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
#endif
- fix_processor_context();
-
do_fpu_end();
tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..f2414c6c5e7c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = {
{ simd_coprocessor_error, xen_simd_coprocessor_error, false },
};
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
@@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
}
}
+ if (nr == ARRAY_SIZE(trap_array) &&
+ *addr >= (void *)early_idt_handler_array[0] &&
+ *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+ nr = (*addr - (void *)early_idt_handler_array[0]) /
+ EARLY_IDT_HANDLER_SIZE;
+ *addr = (void *)xen_early_idt_handler_array[nr];
+ }
+
if (WARN_ON(ist != 0 && !ist_okay))
return false;
@@ -1262,6 +1270,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_gdt(0);
xen_init_irq_ops();
+
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because idt_setup_early_handler needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
+
+ idt_setup_early_handler();
+
xen_init_capabilities();
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1318,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
- /*
- * Setup xen_vcpu early because start_kernel needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
#include <xen/interface/xen.h>
+#include <linux/init.h>
#include <linux/linkage.h>
.macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap hypervisor_callback
+ __INIT
+ENTRY(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+ i = i + 1
+ .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+ .endr
+END(xen_early_idt_handler_array)
+ __FINIT
+
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-11-26 12:48 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-11-26 12:48 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton, Andy Lutomirski
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 12a78d43de767eaf8fb272facb7a7b6f2dc6a9df x86/decoder: Add new TEST instruction pattern
Misc fixes:
- topology enumeration fixes
- KASAN fix
- two entry fixes (not yet the big series related to KASLR)
- remove obsolete code
- instruction decoder fix
- better /dev/mem sanity checks, hopefully working better this time
- pkeys fixes
- two ACPI fixes
- 5-level paging related fixes
- UMIP fixes that should make application visible faults more debuggable
- boot fix for weird virtualization environment
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
drivers/pci/Kconfig # fd2fa6c18b72: x86/PCI: Remove unused Hyper
drivers/pci/Makefile # fd2fa6c18b72: x86/PCI: Remove unused Hyper
drivers/pci/htirq.c # fd2fa6c18b72: x86/PCI: Remove unused Hyper
include/linux/htirq.h # fd2fa6c18b72: x86/PCI: Remove unused Hyper
include/linux/pci.h # fd2fa6c18b72: x86/PCI: Remove unused Hyper
tools/testing/selftests/x86/5lvl.c # 97f404ad3e53: x86/selftests: Add test for
tools/testing/selftests/x86/Makefile# 97f404ad3e53: x86/selftests: Add test for
tools/testing/selftests/x86/mpx-hw.h# a6400120d042: x86/mpx/selftests: Fix up we
tools/testing/selftests/x86/pkey-helpers.h# 7b659ee3e1fe: x86/pkeys/selftests: Fix pro
tools/testing/selftests/x86/protection_keys.c# 91c49c2deb96: x86/pkeys/selftests: Rename
Thanks,
Ingo
------------------>
Andi Kleen (2):
perf/x86/intel/uncore: Cache logical pkg id in uncore driver
x86/topology: Avoid wasting 128k for package id array
Andrey Ryabinin (1):
x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow
Andy Lutomirski (2):
x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing
x86/entry/64: Add missing irqflags tracing to native_load_gs_index()
Bjorn Helgaas (1):
x86/PCI: Remove unused HyperTransport interrupt support
Borislav Petkov (1):
x86/umip: Fix insn_get_code_seg_params()'s return value
Chao Fan (1):
x86/boot/KASLR: Remove unused variable
Craig Bergstrom (1):
x86/mm: Limit mmap() of /dev/mem to valid physical addresses
Dave Hansen (4):
x86/pkeys: Update documentation about availability
x86/mpx/selftests: Fix up weird arrays
x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey'
x86/pkeys/selftests: Fix protection keys write() warning
Kirill A. Shutemov (2):
x86/mm: Prevent non-MAP_FIXED mapping across DEFAULT_MAP_WINDOW border
x86/selftests: Add test for mapping placement for 5-level paging
Masami Hiramatsu (1):
x86/decoder: Add new TEST instruction pattern
Prarit Bhargava (1):
x86/smpboot: Fix __max_logical_packages estimate
Ricardo Neri (4):
x86/umip: Select X86_INTEL_UMIP by default
x86/umip: Print a line in the boot log that UMIP has been enabled
x86/umip: Identify the STR and SLDT instructions
x86/umip: Print a warning into the syslog if UMIP-protected instructions are used
Tom Lendacky (1):
x86/boot: Fix boot failure when SMP MP-table is based at 0
Vikas C Sajjan (2):
x86/acpi: Handle SCI interrupts above legacy space gracefully
x86/acpi: Reduce code duplication in mp_override_legacy_irq()
Documentation/x86/protection-keys.txt | 9 +-
arch/x86/Kconfig | 14 +-
arch/x86/boot/compressed/kaslr.c | 5 +-
arch/x86/entry/entry_64.S | 14 +-
arch/x86/events/intel/uncore.c | 4 +-
arch/x86/events/intel/uncore.h | 2 +-
arch/x86/events/intel/uncore_snbep.c | 2 +-
arch/x86/include/asm/elf.h | 1 +
arch/x86/include/asm/hw_irq.h | 8 --
arch/x86/include/asm/hypertransport.h | 46 ------
arch/x86/include/asm/insn-eval.h | 2 +-
arch/x86/include/asm/io.h | 4 +
arch/x86/include/asm/irqdomain.h | 6 -
arch/x86/include/asm/processor.h | 1 +
arch/x86/kernel/acpi/boot.c | 61 +++++---
arch/x86/kernel/apic/Makefile | 1 -
arch/x86/kernel/apic/htirq.c | 198 --------------------------
arch/x86/kernel/apic/vector.c | 5 +-
arch/x86/kernel/cpu/common.c | 2 +
arch/x86/kernel/mpparse.c | 6 +-
arch/x86/kernel/smpboot.c | 128 +++++------------
arch/x86/kernel/sys_x86_64.c | 10 +-
arch/x86/kernel/umip.c | 88 ++++++++++--
arch/x86/lib/insn-eval.c | 4 +-
arch/x86/lib/x86-opcode-map.txt | 2 +-
arch/x86/mm/hugetlbpage.c | 11 +-
arch/x86/mm/kasan_init_64.c | 143 ++++++++++++++++++-
arch/x86/mm/mmap.c | 62 ++++++++
drivers/char/mem.c | 4 +
drivers/pci/Kconfig | 9 --
drivers/pci/Makefile | 3 -
drivers/pci/htirq.c | 135 ------------------
include/linux/htirq.h | 39 -----
include/linux/pci.h | 6 -
tools/testing/selftests/x86/5lvl.c | 177 +++++++++++++++++++++++
tools/testing/selftests/x86/Makefile | 2 +-
tools/testing/selftests/x86/mpx-hw.h | 4 +-
tools/testing/selftests/x86/pkey-helpers.h | 5 +-
tools/testing/selftests/x86/protection_keys.c | 10 +-
39 files changed, 609 insertions(+), 624 deletions(-)
delete mode 100644 arch/x86/include/asm/hypertransport.h
delete mode 100644 arch/x86/kernel/apic/htirq.c
delete mode 100644 drivers/pci/htirq.c
delete mode 100644 include/linux/htirq.h
create mode 100644 tools/testing/selftests/x86/5lvl.c
diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt
index fa46dcb347bc..ecb0d2dadfb7 100644
--- a/Documentation/x86/protection-keys.txt
+++ b/Documentation/x86/protection-keys.txt
@@ -1,5 +1,10 @@
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
-which will be found on future Intel CPUs.
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
+which is found on Intel's Skylake "Scalable Processor" Server CPUs.
+It will be avalable in future non-server parts.
+
+For anyone wishing to test or use this feature, it is available in
+Amazon's EC2 C5 instances and is known to work there using an Ubuntu
+17.04 image.
Memory Protection Keys provides a mechanism for enforcing page-based
protections, but without requiring modification of the page tables
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f08977d82ca0..09dcc94c4484 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -110,7 +110,7 @@ config X86
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
- select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
+ select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KGDB
select HAVE_ARCH_KMEMCHECK
select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -1805,14 +1805,20 @@ config X86_SMAP
If unsure, say Y.
config X86_INTEL_UMIP
- def_bool n
+ def_bool y
depends on CPU_SUP_INTEL
prompt "Intel User Mode Instruction Prevention" if EXPERT
---help---
The User Mode Instruction Prevention (UMIP) is a security
feature in newer Intel processors. If enabled, a general
- protection fault is issued if the instructions SGDT, SLDT,
- SIDT, SMSW and STR are executed in user mode.
+ protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+ or STR instructions are executed in user mode. These instructions
+ unnecessarily expose information about the hardware state.
+
+ The vast majority of applications do not use these instructions.
+ For the very few that do, software emulation is provided in
+ specific cases in protected and virtual-8086 modes. Emulated
+ results are dummy.
config X86_INTEL_MPX
prompt "Intel MPX (Memory Protection Extensions)"
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a63fbc25ce84..8199a6187251 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
static void mem_avoid_memmap(char *str)
{
static int i;
- int rc;
if (i >= MAX_MEMMAP_REGIONS)
return;
@@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
return 0;
tmp_cmdline = malloc(len + 1);
- if (!tmp_cmdline )
+ if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
memcpy(tmp_cmdline, args, len);
@@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
cmd_line |= boot_params->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ for (cmd_line_size = 0; ptr[cmd_line_size++];)
;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a2b30ec69497..f81d50d7ceac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_IRETQ
+.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ bt $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
#endif
.endm
+.macro TRACE_IRQS_IRETQ
+ TRACE_IRQS_FLAGS EFLAGS(%rsp)
+.endm
+
/*
* When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update
@@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- TRACE_IRQS_OFF
-
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
@@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
UNWIND_HINT_REGS extra=0
+ TRACE_IRQS_OFF
+
/*
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
@@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ TRACE_IRQS_OFF
SWAPGS
.Lgs_change:
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
+ TRACE_IRQS_FLAGS (%rsp)
popfq
FRAME_END
ret
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d45e06346f14..7874c980d569 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus);
- pkg = topology_phys_to_logical_pkg(phys_id);
box = pci_get_drvdata(pdev);
if (!box) {
+ pkg = topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
uncore_extra_pci_dev[pkg].dev[i] = NULL;
@@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[box->pkgid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4364191e7c6b..414dc7e7c950 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid;
+ int pkgid; /* Logical package ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..de8f8625213c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ int pkg = box->pkgid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
if (filter_pdev) {
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3a091cea36c5..0d157d2a1e2a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
+extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b80e46733909..2851077b6051 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -99,14 +99,6 @@ struct irq_alloc_info {
void *dmar_data;
};
#endif
-#ifdef CONFIG_HT_IRQ
- struct {
- int ht_pos;
- int ht_idx;
- struct pci_dev *ht_dev;
- void *ht_update;
- };
-#endif
#ifdef CONFIG_X86_UV
struct {
int uv_limit;
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
deleted file mode 100644
index 5d55df352879..000000000000
--- a/arch/x86/include/asm/hypertransport.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_HYPERTRANSPORT_H
-#define _ASM_X86_HYPERTRANSPORT_H
-
-/*
- * Constants for x86 Hypertransport Interrupts.
- */
-
-#define HT_IRQ_LOW_BASE 0xf8000000
-
-#define HT_IRQ_LOW_VECTOR_SHIFT 16
-#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000
-#define HT_IRQ_LOW_VECTOR(v) \
- (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
-
-#define HT_IRQ_LOW_DEST_ID_SHIFT 8
-#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00
-#define HT_IRQ_LOW_DEST_ID(v) \
- (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
-
-#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000
-#define HT_IRQ_LOW_DM_LOGICAL 0x0000040
-
-#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000
-#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020
-
-
-#define HT_IRQ_LOW_MT_FIXED 0x0000000
-#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004
-#define HT_IRQ_LOW_MT_SMI 0x0000008
-#define HT_IRQ_LOW_MT_NMI 0x000000c
-#define HT_IRQ_LOW_MT_INIT 0x0000010
-#define HT_IRQ_LOW_MT_STARTUP 0x0000014
-#define HT_IRQ_LOW_MT_EXTINT 0x0000018
-#define HT_IRQ_LOW_MT_LINT1 0x000008c
-#define HT_IRQ_LOW_MT_LINT0 0x0000098
-
-#define HT_IRQ_LOW_IRQ_MASKED 0x0000001
-
-
-#define HT_IRQ_HIGH_DEST_ID_SHIFT 0
-#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff
-#define HT_IRQ_HIGH_DEST_ID(v) \
- ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
-
-#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index e1d3b4ce8a92..2b6ccf2c49f1 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -18,6 +18,6 @@
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
-char insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_code_seg_params(struct pt_regs *regs);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 93ae8aee1780..95e948627fd0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#endif
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index f695cc6b8e1f..139feef467f7 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
static inline void arch_init_msi_domain(struct irq_domain *domain) { }
#endif
-#ifdef CONFIG_HT_IRQ
-extern void arch_init_htirq_domain(struct irq_domain *domain);
-#else
-static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
-#endif
-
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2db7cf720b04..cc16fa882e3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ unsigned initialized : 1;
} __randomize_layout;
struct cpuid_regs {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ef9e02e614d0..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
return;
}
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
/*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a9e08924927e..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,7 +12,6 @@ obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_HT_IRQ) += htirq.o
obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index b07075dce8b7..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- * Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- * Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
- struct irq_data *parent = data->parent_data;
- int ret;
-
- ret = parent->chip->irq_set_affinity(parent, mask, force);
- if (ret >= 0) {
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(data);
-
- fetch_ht_irq_msg(data->irq, &msg);
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
- HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- write_ht_irq_msg(data->irq, &msg);
- }
-
- return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .irq_mask = mask_ht_irq,
- .irq_unmask = unmask_ht_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_set_affinity = ht_set_affinity,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs, void *arg)
-{
- struct ht_irq_cfg *ht_cfg;
- struct irq_alloc_info *info = arg;
- struct pci_dev *dev;
- irq_hw_number_t hwirq;
- int ret;
-
- if (nr_irqs > 1 || !info)
- return -EINVAL;
-
- dev = info->ht_dev;
- hwirq = (info->ht_idx & 0xFF) |
- PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
- (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
- if (irq_find_mapping(domain, hwirq) > 0)
- return -EEXIST;
-
- ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
- if (!ht_cfg)
- return -ENOMEM;
-
- ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
- if (ret < 0) {
- kfree(ht_cfg);
- return ret;
- }
-
- /* Initialize msg to a value that will never match the first write. */
- ht_cfg->msg.address_lo = 0xffffffff;
- ht_cfg->msg.address_hi = 0xffffffff;
- ht_cfg->dev = info->ht_dev;
- ht_cfg->update = info->ht_update;
- ht_cfg->pos = info->ht_pos;
- ht_cfg->idx = 0x10 + (info->ht_idx * 2);
- irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
- handle_edge_irq, ht_cfg, "edge");
-
- return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs)
-{
- struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
- BUG_ON(nr_irqs != 1);
- kfree(irq_data->chip_data);
- irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static int htirq_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
-{
- struct ht_irq_msg msg;
- struct irq_cfg *cfg = irqd_cfg(irq_data);
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((apic->irq_dest_mode == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
- write_ht_irq_msg(irq_data->irq, &msg);
- return 0;
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
- struct irq_data *irq_data)
-{
- struct ht_irq_msg msg;
-
- memset(&msg, 0, sizeof(msg));
- write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
- .alloc = htirq_domain_alloc,
- .free = htirq_domain_free,
- .activate = htirq_domain_activate,
- .deactivate = htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
- struct fwnode_handle *fn;
-
- if (disable_apic)
- return;
-
- fn = irq_domain_alloc_named_fwnode("PCI-HT");
- if (!fn)
- goto warn;
-
- htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
- irq_domain_free_fwnode(fn);
- if (!htirq_domain)
- goto warn;
-
- htirq_domain->parent = parent;
- return;
-
-warn:
- pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
- ht_irq_update_t *update)
-{
- struct irq_alloc_info info;
-
- if (!htirq_domain)
- return -ENOSYS;
-
- init_irq_alloc_info(&info, NULL);
- info.ht_idx = idx;
- info.ht_pos = pos;
- info.ht_dev = dev;
- info.ht_update = update;
-
- return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
- &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
- irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 05c85e693a5d..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
/*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
@@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
nr_irqs = NR_VECTORS * nr_cpu_ids;
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
/*
* for MSI and HT dyn irq
*/
@@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
- arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ae9e5eec2f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_UMIP);
+ pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
return;
out:
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 410c5dadcee3..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
}
static unsigned long mpf_base;
+static bool mpf_found;
static unsigned long __init get_mpc_size(unsigned long physptr)
{
@@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
if (!smp_found_config)
return;
- if (!mpf_base)
+ if (!mpf_found)
return;
if (acpi_lapic && early)
@@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
smp_found_config = 1;
#endif
mpf_base = base;
+ mpf_found = true;
pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
base, base + sizeof(*mpf) - 1, mpf);
@@ -858,7 +860,7 @@ static int __init update_mp_table(void)
if (!enable_update_mptable)
return 0;
- if (!mpf_base)
+ if (!mpf_found)
return 0;
mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5f59e6bee123..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
@@ -280,6 +277,25 @@ static void notrace start_secondary(void *unused)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->phys_proc_id == phys_pkg)
+ return c->logical_proc_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
/**
* topology_update_package_map - Update the physical to logical package map
* @pkg: The physical package id as retrieved via CPUID
@@ -287,102 +303,23 @@ static void notrace start_secondary(void *unused)
*/
int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{
- unsigned int new;
+ int new;
- /* Called from early boot ? */
- if (!physical_package_map)
- return 0;
-
- if (pkg >= max_physical_pkg_id)
- return -EINVAL;
-
- /* Set the logical package id */
- if (test_and_set_bit(pkg, physical_package_map))
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_pkg(pkg);
+ if (new >= 0)
goto found;
- if (logical_packages >= __max_logical_packages) {
- pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
- logical_packages, cpu, __max_logical_packages);
- return -ENOSPC;
- }
-
new = logical_packages++;
if (new != pkg) {
pr_info("CPU %u Converting physical %u to logical package %u\n",
cpu, pkg, new);
}
- physical_to_logical_pkg[pkg] = new;
-
found:
- cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ cpu_data(cpu).logical_proc_id = new;
return 0;
}
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
- if (phys_pkg >= max_physical_pkg_id)
- return -1;
- return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
- unsigned int ncpus;
- size_t size;
-
- /*
- * Today neither Intel nor AMD support heterogenous systems. That
- * might change in the future....
- *
- * While ideally we'd want '* smp_num_siblings' in the below @ncpus
- * computation, this won't actually work since some Intel BIOSes
- * report inconsistent HT data when they disable HT.
- *
- * In particular, they reduce the APIC-IDs to only include the cores,
- * but leave the CPUID topology to say there are (2) siblings.
- * This means we don't know how many threads there will be until
- * after the APIC enumeration.
- *
- * By not including this we'll sometimes over-estimate the number of
- * logical packages by the amount of !present siblings, but this is
- * still better than MAX_LOCAL_APIC.
- *
- * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
- * on the command line leading to a similar issue as the HT disable
- * problem because the hyperthreads are usually enumerated after the
- * primary cores.
- */
- ncpus = boot_cpu_data.x86_max_cores;
- if (!ncpus) {
- pr_warn("x86_max_cores == zero !?!?");
- ncpus = 1;
- }
-
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- logical_packages = 0;
-
- /*
- * Possibly larger than what we need as the number of apic ids per
- * package can be smaller than the actual used apic ids.
- */
- max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
- size = max_physical_pkg_id * sizeof(unsigned int);
- physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
- memset(physical_to_logical_pkg, 0xff, size);
- size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
- physical_package_map = kzalloc(size, GFP_KERNEL);
-
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-
- topology_update_package_map(c->phys_proc_id, cpu);
-}
-
void __init smp_store_boot_cpu_info(void)
{
int id = 0; /* CPU 0 */
@@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
- smp_init_package_map(c, id);
+ topology_update_package_map(c->phys_proc_id, id);
+ c->initialized = true;
}
/*
@@ -401,13 +339,16 @@ void smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- *c = boot_cpu_data;
+ /* Copy boot_cpu_data only on the first bringup */
+ if (!c->initialized)
+ *c = boot_cpu_data;
c->cpu_index = id;
/*
* During boot time, CPU0 has this setup already. Save the info when
* bringing up AP or offlined CPU0.
*/
identify_secondary_cpu(c);
+ c->initialized = true;
}
static bool
@@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus)
{
+ int ncpus;
+
pr_debug("Boot done\n");
+ /*
+ * Today neither Intel nor AMD support heterogenous systems so
+ * extrapolate the boot cpu's data to all packages.
+ */
+ ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a63fe77b3217..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED)
return addr;
@@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */
if (addr) {
- addr = PAGE_ALIGN(addr);
+ addr &= PAGE_MASK;
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+get_unmapped_area:
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 6ba82be68cff..f44ce0fb3583 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -78,7 +78,60 @@
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
#define UMIP_INST_SIDT 1 /* 0F 01 /1 */
-#define UMIP_INST_SMSW 3 /* 0F 01 /4 */
+#define UMIP_INST_SMSW 2 /* 0F 01 /4 */
+#define UMIP_INST_SLDT 3 /* 0F 00 /0 */
+#define UMIP_INST_STR 4 /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+ [UMIP_INST_SGDT] = "SGDT",
+ [UMIP_INST_SIDT] = "SIDT",
+ [UMIP_INST_SMSW] = "SMSW",
+ [UMIP_INST_SLDT] = "SLDT",
+ [UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+ umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+ umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs: Register set with the context in which the warning is printed
+ * @log_level: Kernel log level to print the message
+ * @fmt: The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+ const char *fmt, ...)
+{
+ /* Bursts of 5 messages every two minutes */
+ static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+ struct task_struct *tsk = current;
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&ratelimit))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+ task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+ va_end(args);
+}
/**
* identify_insn() - Identify a UMIP-protected instruction
@@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
default:
return -EINVAL;
}
+ } else if (insn->opcode.bytes[1] == 0x0) {
+ if (X86_MODRM_REG(insn->modrm.value) == 0)
+ return UMIP_INST_SLDT;
+ else if (X86_MODRM_REG(insn->modrm.value) == 1)
+ return UMIP_INST_STR;
+ else
+ return -EINVAL;
+ } else {
+ return -EINVAL;
}
-
- /* SLDT AND STR are not emulated */
- return -EINVAL;
}
/**
@@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
- pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
- tsk->comm, task_pid_nr(tsk), regs->ip,
- regs->sp, X86_PF_USER | X86_PF_WRITE,
- regs->ip);
+ umip_pr_err(regs, "segfault in emulation. error%x\n",
+ X86_PF_USER | X86_PF_WRITE);
}
/**
@@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
unsigned char buf[MAX_INSN_SIZE];
void __user *uaddr;
struct insn insn;
- char seg_defs;
+ int seg_defs;
if (!regs)
return false;
- /* Do not emulate 64-bit processes. */
- if (user_64bit_mode(regs))
- return false;
-
/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
@@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
+ umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+ umip_insns[umip_inst]);
+
+ /* Do not emulate SLDT, STR or user long mode processes. */
+ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+ return false;
+
+ umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
return false;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 35625d279458..9119d8e41f1f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
*
* Returns:
*
- * A signed 8-bit value containing the default parameters on success.
+ * An int containing ORed-in default parameters on success.
*
* -EINVAL on error.
*/
-char insn_get_code_seg_params(struct pt_regs *regs)
+int insn_get_code_seg_params(struct pt_regs *regs)
{
struct desc_struct *desc;
short sel;
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8ae0000cbdb3..00b296617ca4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len > TASK_SIZE)
return -ENOMEM;
+ /* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
@@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
+ addr &= huge_page_mask(h);
+ if (!mmap_address_hint_valid(addr, len))
+ goto get_unmapped_area;
+
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (!vma || addr + len <= vm_start_gap(vma))
return addr;
}
+
+get_unmapped_area:
if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 2b60dc6e64b1..99dfed6dfef8 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -4,12 +4,14 @@
#include <linux/bootmem.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/vmalloc.h>
#include <asm/e820/types.h>
+#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
-static int __init map_range(struct range *range)
+static __init void *early_alloc(size_t size, int nid)
+{
+ return memblock_virt_alloc_try_nid_nopanic(size, size,
+ __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
+ unsigned long end, int nid)
+{
+ pte_t *pte;
+
+ if (pmd_none(*pmd)) {
+ void *p;
+
+ if (boot_cpu_has(X86_FEATURE_PSE) &&
+ ((end - addr) == PMD_SIZE) &&
+ IS_ALIGNED(addr, PMD_SIZE)) {
+ p = early_alloc(PMD_SIZE, nid);
+ if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
+ return;
+ else if (p)
+ memblock_free(__pa(p), PMD_SIZE);
+ }
+
+ p = early_alloc(PAGE_SIZE, nid);
+ pmd_populate_kernel(&init_mm, pmd, p);
+ }
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ pte_t entry;
+ void *p;
+
+ if (!pte_none(*pte))
+ continue;
+
+ p = early_alloc(PAGE_SIZE, nid);
+ entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, pte, entry);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
+ unsigned long end, int nid)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ if (pud_none(*pud)) {
+ void *p;
+
+ if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
+ ((end - addr) == PUD_SIZE) &&
+ IS_ALIGNED(addr, PUD_SIZE)) {
+ p = early_alloc(PUD_SIZE, nid);
+ if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
+ return;
+ else if (p)
+ memblock_free(__pa(p), PUD_SIZE);
+ }
+
+ p = early_alloc(PAGE_SIZE, nid);
+ pud_populate(&init_mm, pud, p);
+ }
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (!pmd_large(*pmd))
+ kasan_populate_pmd(pmd, addr, next, nid);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
+ unsigned long end, int nid)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ if (p4d_none(*p4d)) {
+ void *p = early_alloc(PAGE_SIZE, nid);
+
+ p4d_populate(&init_mm, p4d, p);
+ }
+
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (!pud_large(*pud))
+ kasan_populate_pud(pud, addr, next, nid);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
+ unsigned long end, int nid)
+{
+ void *p;
+ p4d_t *p4d;
+ unsigned long next;
+
+ if (pgd_none(*pgd)) {
+ p = early_alloc(PAGE_SIZE, nid);
+ pgd_populate(&init_mm, pgd, p);
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ kasan_populate_p4d(p4d, addr, next, nid);
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
+ int nid)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ addr = addr & PAGE_MASK;
+ end = round_up(end, PAGE_SIZE);
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_populate_pgd(pgd, addr, next, nid);
+ } while (pgd++, addr = next, addr != end);
+}
+
+static void __init map_range(struct range *range)
{
unsigned long start;
unsigned long end;
@@ -26,7 +155,7 @@ static int __init map_range(struct range *range)
start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
- return vmemmap_populate(start, end, NUMA_NO_NODE);
+ kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
}
static void __init clear_pgds(unsigned long start,
@@ -189,16 +318,16 @@ void __init kasan_init(void)
if (pfn_mapped[i].end == 0)
break;
- if (map_range(&pfn_mapped[i]))
- panic("kasan: unable to allocate shadow!");
+ map_range(&pfn_mapped[i]);
}
+
kasan_populate_zero_shadow(
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
kasan_mem_to_shadow((void *)__START_KERNEL_map));
- vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
- (unsigned long)kasan_mem_to_shadow(_end),
- NUMA_NO_NODE);
+ kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+ (unsigned long)kasan_mem_to_shadow(_end),
+ early_pfn_to_nid(__pa(_stext)));
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a99679826846..155ecbac9e28 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -33,6 +33,8 @@
#include <linux/compat.h>
#include <asm/elf.h>
+#include "physaddr.h"
+
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
@@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return "[mpx]";
return NULL;
}
+
+/**
+ * mmap_address_hint_valid - Validate the address hint of mmap
+ * @addr: Address hint
+ * @len: Mapping length
+ *
+ * Check whether @addr and @addr + @len result in a valid mapping.
+ *
+ * On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
+ *
+ * On 64bit with 5-level page tables another sanity check is required
+ * because mappings requested by mmap(@addr, 0) which cross the 47-bit
+ * virtual address boundary can cause the following theoretical issue:
+ *
+ * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
+ * is below the border of the 47-bit address space and @addr + @len is
+ * above the border.
+ *
+ * With 4-level paging this request succeeds, but the resulting mapping
+ * address will always be within the 47-bit virtual address space, because
+ * the hint address does not result in a valid mapping and is
+ * ignored. Hence applications which are not prepared to handle virtual
+ * addresses above 47-bit work correctly.
+ *
+ * With 5-level paging this request would be granted and result in a
+ * mapping which crosses the border of the 47-bit virtual address
+ * space. If the application cannot handle addresses above 47-bit this
+ * will lead to misbehaviour and hard to diagnose failures.
+ *
+ * Therefore ignore address hints which would result in a mapping crossing
+ * the 47-bit virtual address boundary.
+ *
+ * Note, that in the same scenario with MAP_FIXED the behaviour is
+ * different. The request with @addr < 47-bit and @addr + @len > 47-bit
+ * fails on a 4-level paging machine but succeeds on a 5-level paging
+ * machine. It is reasonable to expect that an application does not rely on
+ * the failure of such a fixed mapping request, so the restriction is not
+ * applied.
+ */
+bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+{
+ if (TASK_SIZE - len < addr)
+ return false;
+
+ return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
+}
+
+/* Can we access it for direct reading/writing? Must be RAM: */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+ return addr + count <= __pa(high_memory);
+}
+
+/* Can we access it through mmap? Must be a valid physical address: */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+ phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
+
+ return phys_addr_valid(addr + count - 1);
+}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 970e1242a282..6aefe5370e5b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -343,6 +343,10 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
size_t size = vma->vm_end - vma->vm_start;
phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+ /* Does it even fit in phys_addr_t? */
+ if (offset >> PAGE_SHIFT != vma->vm_pgoff)
+ return -EINVAL;
+
/* It's illegal to wrap around the end of the physical address space. */
if (offset + (phys_addr_t)size - 1 < offset)
return -EINVAL;
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index c32a77fc8b03..99ae5e30eabe 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -71,15 +71,6 @@ config XEN_PCIDEV_FRONTEND
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
-config HT_IRQ
- bool "Interrupts on hypertransport devices"
- default y
- depends on PCI && X86_LOCAL_APIC
- help
- This allows native hypertransport devices to use interrupts.
-
- If unsure say Y.
-
config PCI_ATS
bool
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 80adbdbcecce..ab0104e0ffac 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -24,9 +24,6 @@ endif
# Build the PCI MSI interrupt support
obj-$(CONFIG_PCI_MSI) += msi.o
-# Build the Hypertransport interrupt support
-obj-$(CONFIG_HT_IRQ) += htirq.o
-
obj-$(CONFIG_PCI_ATS) += ats.o
obj-$(CONFIG_PCI_IOV) += iov.o
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
deleted file mode 100644
index bb88c26f5144..000000000000
--- a/drivers/pci/htirq.c
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * File: htirq.c
- * Purpose: Hypertransport Interrupt Capability
- *
- * Copyright (C) 2006 Linux Networx
- * Copyright (C) Eric Biederman <ebiederman@lnxi.com>
- */
-
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/htirq.h>
-
-/* Global ht irq lock.
- *
- * This is needed to serialize access to the data port in hypertransport
- * irq capability.
- *
- * With multiple simultaneous hypertransport irq devices it might pay
- * to make this more fine grained. But start with simple, stupid, and correct.
- */
-static DEFINE_SPINLOCK(ht_irq_lock);
-
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
- struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
- unsigned long flags;
-
- spin_lock_irqsave(&ht_irq_lock, flags);
- if (cfg->msg.address_lo != msg->address_lo) {
- pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
- pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo);
- }
- if (cfg->msg.address_hi != msg->address_hi) {
- pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
- pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
- }
- if (cfg->update)
- cfg->update(cfg->dev, irq, msg);
- spin_unlock_irqrestore(&ht_irq_lock, flags);
- cfg->msg = *msg;
-}
-
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
-{
- struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
-
- *msg = cfg->msg;
-}
-
-void mask_ht_irq(struct irq_data *data)
-{
- struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
- struct ht_irq_msg msg = cfg->msg;
-
- msg.address_lo |= 1;
- write_ht_irq_msg(data->irq, &msg);
-}
-
-void unmask_ht_irq(struct irq_data *data)
-{
- struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
- struct ht_irq_msg msg = cfg->msg;
-
- msg.address_lo &= ~1;
- write_ht_irq_msg(data->irq, &msg);
-}
-
-/**
- * __ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- * @update: Function to be called when changing the htirq message
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
-{
- int max_irq, pos, irq;
- unsigned long flags;
- u32 data;
-
- pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
- if (!pos)
- return -EINVAL;
-
- /* Verify the idx I want to use is in range */
- spin_lock_irqsave(&ht_irq_lock, flags);
- pci_write_config_byte(dev, pos + 2, 1);
- pci_read_config_dword(dev, pos + 4, &data);
- spin_unlock_irqrestore(&ht_irq_lock, flags);
-
- max_irq = (data >> 16) & 0xff;
- if (idx > max_irq)
- return -EINVAL;
-
- irq = arch_setup_ht_irq(idx, pos, dev, update);
- if (irq > 0)
- dev_dbg(&dev->dev, "irq %d for HT\n", irq);
-
- return irq;
-}
-EXPORT_SYMBOL(__ht_create_irq);
-
-/**
- * ht_create_irq - create an irq and attach it to a device.
- * @dev: The hypertransport device to find the irq capability on.
- * @idx: Which of the possible irqs to attach to.
- *
- * ht_create_irq needs to be called for all hypertransport devices
- * that generate irqs.
- *
- * The irq number of the new irq or a negative error value is returned.
- */
-int ht_create_irq(struct pci_dev *dev, int idx)
-{
- return __ht_create_irq(dev, idx, NULL);
-}
-EXPORT_SYMBOL(ht_create_irq);
-
-/**
- * ht_destroy_irq - destroy an irq created with ht_create_irq
- * @irq: irq to be destroyed
- *
- * This reverses ht_create_irq removing the specified irq from
- * existence. The irq should be free before this happens.
- */
-void ht_destroy_irq(unsigned int irq)
-{
- arch_teardown_ht_irq(irq);
-}
-EXPORT_SYMBOL(ht_destroy_irq);
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
deleted file mode 100644
index 127c39d815ba..000000000000
--- a/include/linux/htirq.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_HTIRQ_H
-#define LINUX_HTIRQ_H
-
-struct pci_dev;
-struct irq_data;
-
-struct ht_irq_msg {
- u32 address_lo; /* low 32 bits of the ht irq message */
- u32 address_hi; /* high 32 bits of the it irq message */
-};
-
-typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
- struct ht_irq_msg *msg);
-
-struct ht_irq_cfg {
- struct pci_dev *dev;
- /* Update callback used to cope with buggy hardware */
- ht_irq_update_t *update;
- unsigned pos;
- unsigned idx;
- struct ht_irq_msg msg;
-};
-
-/* Helper functions.. */
-void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void mask_ht_irq(struct irq_data *data);
-void unmask_ht_irq(struct irq_data *data);
-
-/* The arch hook for getting things started */
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
- ht_irq_update_t *update);
-void arch_teardown_ht_irq(unsigned int irq);
-
-/* For drivers of buggy hardware */
-int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
-
-#endif /* LINUX_HTIRQ_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d16a7c037ec0..16287684dfe8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1484,12 +1484,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
static inline void pcie_ecrc_get_policy(char *str) { }
#endif
-#ifdef CONFIG_HT_IRQ
-/* The functions a driver should call */
-int ht_create_irq(struct pci_dev *dev, int idx);
-void ht_destroy_irq(unsigned int irq);
-#endif /* CONFIG_HT_IRQ */
-
#ifdef CONFIG_PCI_ATS
/* Address Translation Service */
void pci_ats_init(struct pci_dev *dev);
diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c
new file mode 100644
index 000000000000..2eafdcd4c2b3
--- /dev/null
+++ b/tools/testing/selftests/x86/5lvl.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <sys/mman.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define PAGE_SIZE 4096
+#define LOW_ADDR ((void *) (1UL << 30))
+#define HIGH_ADDR ((void *) (1UL << 50))
+
+struct testcase {
+ void *addr;
+ unsigned long size;
+ unsigned long flags;
+ const char *msg;
+ unsigned int low_addr_required:1;
+ unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+ {
+ .addr = NULL,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+ },
+ {
+ .addr = (void*) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void*) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1) again",
+ },
+ {
+ .addr = (void *)((1UL << 47) - PAGE_SIZE),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap((1UL << 47), 2 * PAGE_SIZE)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)((1UL << 47) - PAGE_SIZE / 2),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)((1UL << 47) - PAGE_SIZE),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)",
+ },
+ {
+ .addr = NULL,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+ },
+ {
+ .addr = (void*) -1,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void*) -1,
+ .size = 2UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB) again",
+ },
+ {
+ .addr = (void *)((1UL << 47) - PAGE_SIZE),
+ .size = 4UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)((1UL << 47) - (2UL << 20)),
+ .size = 4UL << 20,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)",
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int i;
+ void *p;
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ struct testcase *t = testcases + i;
+
+ p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0);
+
+ printf("%s: %p - ", t->msg, p);
+
+ if (p == MAP_FAILED) {
+ printf("FAILED\n");
+ continue;
+ }
+
+ if (t->low_addr_required && p >= (void *)(1UL << 47))
+ printf("FAILED\n");
+ else
+ printf("OK\n");
+ if (!t->keep_mapped)
+ munmap(p, t->size);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 7b1adeee4b0f..939a337128db 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
index 3f0093911f03..d1b61ab870f8 100644
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -52,14 +52,14 @@
struct mpx_bd_entry {
union {
char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
- void *contents[1];
+ void *contents[0];
};
} __attribute__((packed));
struct mpx_bt_entry {
union {
char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
- unsigned long contents[1];
+ unsigned long contents[0];
};
} __attribute__((packed));
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index 3818f25391c2..b3cb7670e026 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -30,6 +30,7 @@ static inline void sigsafe_printf(const char *format, ...)
if (!dprint_in_signal) {
vprintf(format, ap);
} else {
+ int ret;
int len = vsnprintf(dprint_in_signal_buffer,
DPRINT_IN_SIGNAL_BUF_SIZE,
format, ap);
@@ -39,7 +40,9 @@ static inline void sigsafe_printf(const char *format, ...)
*/
if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
len = DPRINT_IN_SIGNAL_BUF_SIZE;
- write(1, dprint_in_signal_buffer, len);
+ ret = write(1, dprint_in_signal_buffer, len);
+ if (ret < 0)
+ abort();
}
va_end(ap);
}
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 7a1cc0e56d2d..bc1b0735bb50 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -250,7 +250,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
unsigned long ip;
char *fpregs;
u32 *pkru_ptr;
- u64 si_pkey;
+ u64 siginfo_pkey;
u32 *si_pkey_ptr;
int pkru_offset;
fpregset_t fpregset;
@@ -292,9 +292,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
dump_mem(si_pkey_ptr - 8, 24);
- si_pkey = *si_pkey_ptr;
- pkey_assert(si_pkey < NR_PKEYS);
- last_si_pkey = si_pkey;
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -306,7 +306,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
/* need __rdpkru() version so we do not do shadow_pkru checking */
dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("si_pkey from siginfo: %jx\n", si_pkey);
+ dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
*(u64 *)pkru_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
pkru_faults++;
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-11-05 14:46 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-11-05 14:46 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Borislav Petkov,
Peter Zijlstra, Andrew Morton, Josh Poimboeuf
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: eda9cec4c9a12208a6f69fbe68f72a6311d50032 x86/module: Detect and skip invalid relocations
Two fixes:
- A PCID related revert that fixes power management and performance regressions.
- The module loader robustization and sanity check commit is rather fresh, but it
looked like a good idea to apply because of the hidden data corruption problem
such invalid modules could cause.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
arch/ia64/include/asm/acpi.h # 675357362aeb: Revert "x86/mm: Stop calling
drivers/acpi/processor_idle.c # 675357362aeb: Revert "x86/mm: Stop calling
drivers/idle/intel_idle.c # 675357362aeb: Revert "x86/mm: Stop calling
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
Revert "x86/mm: Stop calling leave_mm() in idle code"
Josh Poimboeuf (1):
x86/module: Detect and skip invalid relocations
arch/ia64/include/asm/acpi.h | 2 ++
arch/x86/include/asm/acpi.h | 2 ++
arch/x86/kernel/module.c | 13 +++++++++++++
arch/x86/mm/tlb.c | 17 ++++++++++++++---
drivers/acpi/processor_idle.c | 2 ++
drivers/idle/intel_idle.c | 9 +++++----
6 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index c86a947f5368..a3d0211970e9 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,6 +112,8 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
}
+#define acpi_unlazy_tlb(x)
+
#ifdef CONFIG_ACPI_NUMA
extern cpumask_t early_cpu_possible_map;
#define for_each_possible_early_cpu(cpu) \
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 72d867f6b518..8d0ec9df1cbe 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,6 +150,8 @@ static inline void disable_acpi(void) { }
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
+#define acpi_unlazy_tlb(x) leave_mm(x)
+
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
{
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70aadd5..da0c160e5589 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_X86_64_NONE:
break;
case R_X86_64_64:
+ if (*(u64 *)loc != 0)
+ goto invalid_relocation;
*(u64 *)loc = val;
break;
case R_X86_64_32:
+ if (*(u32 *)loc != 0)
+ goto invalid_relocation;
*(u32 *)loc = val;
if (val != *(u32 *)loc)
goto overflow;
break;
case R_X86_64_32S:
+ if (*(s32 *)loc != 0)
+ goto invalid_relocation;
*(s32 *)loc = val;
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
case R_X86_64_PC32:
+ if (*(u32 *)loc != 0)
+ goto invalid_relocation;
val -= (u64)loc;
*(u32 *)loc = val;
#if 0
@@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
return 0;
+invalid_relocation:
+ pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+ return -ENOEXEC;
+
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0f3d0cea4d00..3118392cdf75 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -85,6 +85,7 @@ void leave_mm(int cpu)
switch_mm(NULL, &init_mm, NULL);
}
+EXPORT_SYMBOL_GPL(leave_mm);
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
@@ -195,12 +196,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
write_cr3(build_cr3(next, new_asid));
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
+
+ /*
+ * NB: This gets called via leave_mm() in the idle path
+ * where RCU functions differently. Tracing normally
+ * uses RCU, so we need to use the _rcuidle variant.
+ *
+ * (There is no good reason for this. The idle code should
+ * be rearranged to call this before rcu_idle_enter().)
+ */
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
write_cr3(build_cr3_noflush(next, new_asid));
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+
+ /* See above wrt _rcuidle. */
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
this_cpu_write(cpu_tlbstate.loaded_mm, next);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2736e25e9dc6..d50a7b6ccddd 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -710,6 +710,8 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
static void acpi_idle_enter_bm(struct acpi_processor *pr,
struct acpi_processor_cx *cx, bool timer_bc)
{
+ acpi_unlazy_tlb(smp_processor_id());
+
/*
* Must be done before busmaster disable as we might need to
* access HPET !
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5dc7ea4b6bc4..f0b06b14e782 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -913,15 +913,16 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
struct cpuidle_state *state = &drv->states[index];
unsigned long eax = flg2MWAIT(state->flags);
unsigned int cstate;
+ int cpu = smp_processor_id();
cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
/*
- * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition
- * will probably flush the TLB. It's not guaranteed to flush
- * the TLB, though, so it's not clear that we can do anything
- * useful with this knowledge.
+ * leave_mm() to avoid costly and often unnecessary wakeups
+ * for flushing the user TLB's associated with the active mm.
*/
+ if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
+ leave_mm(cpu);
if (!(lapic_timer_reliable_states & (1 << (cstate))))
tick_broadcast_enter();
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-10-27 19:24 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-10-27 19:24 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 90edaac62729d3b9cbb97756261a0049a7fdd6a0 Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"
Misc fixes:
- revert a /dev/mem restriction change that crashes with certain boot parameters
- an AMD erratum fix for cases where the BIOS doesn't apply it
- fix unwinder debuginfo
- improve ORC unwinder warning printouts
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't
Ingo Molnar (1):
Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses"
Josh Poimboeuf (2):
x86/entry: Fix idtentry unwind hint
x86/unwind: Show function name+offset in ORC error messages
arch/x86/entry/entry_64.S | 2 +-
arch/x86/include/asm/io.h | 4 ----
arch/x86/kernel/amd_nb.c | 41 +++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/unwind_orc.c | 29 +++++++++++++++--------------
arch/x86/mm/mmap.c | 12 ------------
5 files changed, 57 insertions(+), 31 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 49167258d587..f6cdb7a1455e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -808,7 +808,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
- UNWIND_HINT_IRET_REGS offset=8
+ UNWIND_HINT_IRET_REGS offset=\has_error_code*8
/* Sanity check */
.if \shift_ist != -1 && \paranoid == 0
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 322d25ae23ab..c40a95c33bb8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -110,10 +110,6 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#endif
-#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
-extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da8509b75..6db28f17ff28 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
{}
};
+#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704
+
const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
}
EXPORT_SYMBOL_GPL(amd_flush_garts);
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+ msr_set_bit(MSR_AMD64_IC_CFG, 3);
+ msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+ struct pci_dev *F4;
+ u32 val;
+
+ if (boot_cpu_data.x86 != 0x14)
+ return;
+
+ if (!amd_northbridges.num)
+ return;
+
+ F4 = node_to_amd_nb(0)->link;
+ if (!F4)
+ return;
+
+ if (pci_read_config_dword(F4, 0x164, &val))
+ return;
+
+ if (val & BIT(2))
+ return;
+
+ on_each_cpu(__fix_erratum_688, NULL, 0);
+
+ pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
static __init int init_amd_nbs(void)
{
amd_cache_northbridges();
amd_cache_gart();
+ fix_erratum_688();
+
return 0;
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 570b70d3f604..b95007e7c1b3 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip)
idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
if (unlikely((idx >= lookup_num_blocks-1))) {
- orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
- idx, lookup_num_blocks, ip);
+ orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+ idx, lookup_num_blocks, (void *)ip);
return NULL;
}
@@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip)
if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
(__start_orc_unwind + stop > __stop_orc_unwind))) {
- orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
- idx, lookup_num_blocks, start, stop, ip);
+ orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+ idx, lookup_num_blocks, start, stop, (void *)ip);
return NULL;
}
@@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_R10:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %p\n",
+ orc_warn("missing regs for base reg R10 at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_R13:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %p\n",
+ orc_warn("missing regs for base reg R13 at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_DI:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %p\n",
+ orc_warn("missing regs for base reg DI at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_DX:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %p\n",
+ orc_warn("missing regs for base reg DX at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown SP base reg %d for ip %p\n",
+ orc_warn("unknown SP base reg %d for ip %pB\n",
orc->sp_reg, (void *)state->ip);
goto done;
}
@@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
- orc_warn("can't dereference registers at %p for ip %p\n",
+ orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
@@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_TYPE_REGS_IRET:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
- orc_warn("can't dereference iret registers at %p for ip %p\n",
+ orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
@@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+ orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc->type, (void *)orig_ip);
break;
}
@@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown BP base reg %d for ip %p\n",
+ orc_warn("unknown BP base reg %d for ip %pB\n",
orc->bp_reg, (void *)orig_ip);
goto done;
}
@@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%p\n",
+ orc_warn("stack going in the wrong direction? ip=%pB\n",
(void *)orig_ip);
goto done;
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 320c6237e1d1..a99679826846 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -174,15 +174,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return "[mpx]";
return NULL;
}
-
-int valid_phys_addr_range(phys_addr_t addr, size_t count)
-{
- return addr + count <= __pa(high_memory);
-}
-
-int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
-{
- phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
-
- return valid_phys_addr_range(addr, count);
-}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-10-14 16:16 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-10-14 16:16 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 x86/microcode: Do the family check first
A landry list of fixes:
- fix reboot breakage on some PCID-enabled system
- fix crashes/hangs on some PCID-enabled systems
- fix microcode loading on certain older CPUs
- various unwinder fixes
- extend an APIC quirk to more hardware systems and disable APIC related warning
on virtualized systems
- various Hyper-V fixes
- a macro definition robustness fix
- remove jprobes IRQ disabling
- various mem-encryption fixes
Thanks,
Ingo
------------------>
Andy Lutomirski (2):
x86/mm/64: Fix reboot interaction with CR4.PCIDE
x86/mm: Flush more aggressively in lazy TLB mode
Borislav Petkov (1):
x86/microcode: Do the family check first
Josh Poimboeuf (5):
kprobes/x86: Set up frame pointer in kprobe trampoline
x86/unwind: Fix dereference of untrusted pointer
x86/unwind: Use MSB for frame pointer encoding on 32-bit
x86/unwind: Align stack pointer in unwinder dump
x86/unwind: Disable unwinder warnings on 32-bit
Len Brown (1):
x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping
Marcelo Henrique Cerri (1):
x86/hyperv: Fix hypercalls with extended CPU ranges for TLB flushing
Masami Hiramatsu (1):
kprobes/x86: Remove IRQ disabling from jprobe handlers
Mathias Krause (1):
x86/alternatives: Fix alt_max_short macro to really be a max()
Paolo Bonzini (1):
x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors
Tom Lendacky (1):
x86/mm: Disable various instrumentations of mm/mem_encrypt.c and mm/tlb.c
Vitaly Kuznetsov (2):
x86/hyperv: Clear vCPU banks between calls to avoid flushing unneeded vCPUs
x86/hyperv: Don't use percpu areas for pcpu_flush/pcpu_flush_ex structures
arch/x86/entry/entry_32.S | 4 +-
arch/x86/hyperv/hv_init.c | 5 ++
arch/x86/hyperv/mmu.c | 57 +++++++++---
arch/x86/include/asm/alternative-asm.h | 4 +-
arch/x86/include/asm/alternative.h | 6 +-
arch/x86/include/asm/mmu_context.h | 8 +-
arch/x86/include/asm/mshyperv.h | 1 +
arch/x86/include/asm/tlbflush.h | 24 ++++++
arch/x86/kernel/apic/apic.c | 15 +++-
arch/x86/kernel/cpu/microcode/core.c | 27 ++++--
arch/x86/kernel/kprobes/common.h | 13 ++-
arch/x86/kernel/kprobes/core.c | 2 -
arch/x86/kernel/reboot.c | 4 +
arch/x86/kernel/unwind_frame.c | 38 +++++++-
arch/x86/mm/Makefile | 11 ++-
arch/x86/mm/tlb.c | 153 ++++++++++++++++++++++++---------
16 files changed, 284 insertions(+), 88 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 8a13d468635a..50e0d2bc4528 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,7 +176,7 @@
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
+ * is just clearing the MSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
@@ -185,7 +185,7 @@
.macro ENCODE_FRAME_POINTER
#ifdef CONFIG_FRAME_POINTER
mov %esp, %ebp
- orl $0x1, %ebp
+ andl $0x7fffffff, %ebp
#endif
.endm
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1a8eb550c40f..a5db63f728a2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
+u32 hv_max_vp_index;
+
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
@@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu)
hv_vp_index[smp_processor_id()] = msr_vp_index;
+ if (msr_vp_index > hv_max_vp_index)
+ hv_max_vp_index = msr_vp_index;
+
return 0;
}
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 39e7f6e50919..9cc9e1c1e2db 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex {
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
-static struct hv_flush_pcpu __percpu *pcpu_flush;
+static struct hv_flush_pcpu __percpu **pcpu_flush;
-static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
/*
* Fills in gva_list starting from offset. Returns the number of items added.
@@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+ /* valid_bank_mask can represent up to 64 banks */
+ if (hv_max_vp_index / 64 >= 64)
+ return 0;
+
+ /*
+ * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+ * structs are not cleared between calls, we risk flushing unneeded
+ * vCPUs otherwise.
+ */
+ for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+ flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
+
/*
* Some banks may end up being empty but this is acceptable.
*/
@@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
vcpu = hv_cpu_number_to_vp_number(cpu);
vcpu_bank = vcpu / 64;
vcpu_offset = vcpu % 64;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (vcpu_bank >= 64)
- return 0;
-
__set_bit(vcpu_offset, (unsigned long *)
&flush->hv_vp_set.bank_contents[vcpu_bank]);
if (vcpu_bank >= nr_bank)
@@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu **flush_pcpu;
struct hv_flush_pcpu *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
local_irq_save(flags);
- flush = this_cpu_ptr(pcpu_flush);
+ flush_pcpu = this_cpu_ptr(pcpu_flush);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
if (info->mm) {
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int nr_bank = 0, max_gvas, gva_n;
+ struct hv_flush_pcpu_ex **flush_pcpu;
struct hv_flush_pcpu_ex *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
local_irq_save(flags);
- flush = this_cpu_ptr(pcpu_flush_ex);
+ flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
if (info->mm) {
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
- 0, nr_bank + 2, flush, NULL);
+ 0, nr_bank, flush, NULL);
} else if (info->end &&
((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
- 0, nr_bank + 2, flush, NULL);
+ 0, nr_bank, flush, NULL);
} else {
gva_n = fill_gva_list(flush->gva_list, nr_bank,
info->start, info->end);
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
- gva_n, nr_bank + 2, flush, NULL);
+ gva_n, nr_bank, flush, NULL);
}
local_irq_restore(flags);
@@ -266,7 +295,7 @@ void hyper_alloc_mmu(void)
return;
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
else
- pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
}
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636bac7372..6c98821fef5e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
#define new_len2 145f-144f
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index c096624137ae..ccbe24e697c4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c120b5db178a..3c856a15b98e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
DEBUG_LOCKS_WARN_ON(preemptible());
}
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
- int cpu = smp_processor_id();
-
- if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
- cpumask_clear_cpu(cpu, mm_cpumask(mm));
-}
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 738503e1f80c..530f448fddaf 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
* to this information.
*/
extern u32 *hv_vp_index;
+extern u32 hv_max_vp_index;
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4893abf7f74f..d362161d3291 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,6 +83,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
#endif
/*
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread). If
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
+ */
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
+/*
* 6 because 6 should be plenty and struct tlb_state will fit in
* two cache lines.
*/
@@ -105,6 +112,23 @@ struct tlb_state {
u16 next_asid;
/*
+ * We can be in one of several states:
+ *
+ * - Actively using an mm. Our CPU's bit will be set in
+ * mm_cpumask(loaded_mm) and is_lazy == false;
+ *
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+ *
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
+ * We're heuristically guessing that the CR3 load we
+ * skipped more than makes up for the overhead added by
+ * lazy mode.
+ */
+ bool is_lazy;
+
+ /*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..ff891772c9f8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
+static u32 skx_deadline_rev(void)
+{
+ switch (boot_cpu_data.x86_mask) {
+ case 0x03: return 0x01000136;
+ case 0x04: return 0x02000014;
+ }
+
+ return ~0U;
+}
+
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
m = x86_match_cpu(deadline_match);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b2537b..c4fa4a85d4cb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
bool *res = &dis_ucode_ldr;
#endif
- if (!have_cpuid_p())
- return *res;
-
/*
* CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
* completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
void __init load_ucode_bsp(void)
{
unsigned int cpuid_1_eax;
+ bool intel = true;
- if (check_loader_disabled_bsp())
+ if (!have_cpuid_p())
return;
cpuid_1_eax = native_cpuid_eax(1);
switch (x86_cpuid_vendor()) {
case X86_VENDOR_INTEL:
- if (x86_family(cpuid_1_eax) >= 6)
- load_ucode_intel_bsp();
+ if (x86_family(cpuid_1_eax) < 6)
+ return;
break;
+
case X86_VENDOR_AMD:
- if (x86_family(cpuid_1_eax) >= 0x10)
- load_ucode_amd_bsp(cpuid_1_eax);
+ if (x86_family(cpuid_1_eax) < 0x10)
+ return;
+ intel = false;
break;
+
default:
- break;
+ return;
}
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (intel)
+ load_ucode_intel_bsp();
+ else
+ load_ucode_amd_bsp(cpuid_1_eax);
}
static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d63ed0..3fc0f9a794cb 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -3,6 +3,15 @@
/* Kprobes and Optprobes common header */
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
+ " mov %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
+#endif
+
#ifdef CONFIG_X86_64
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
@@ -17,7 +26,7 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- " pushq %rbp\n" \
+ SAVE_RBP_STRING \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
@@ -48,7 +57,7 @@
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- " pushl %ebp\n" \
+ SAVE_RBP_STRING \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..0742491cbb73 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* raw stack chunk with redzones:
*/
__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->flags &= ~X86_EFLAGS_IF;
- trace_hardirqs_off();
regs->ip = (unsigned long)(jp->entry);
/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa6f66f..add33f600531 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type)
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b1f529..3dc26f95d46e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
state->stack_info.type, state->stack_info.next_sp,
state->stack_mask, state->graph_idx);
- for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+ sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
break;
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
* This determines if the frame pointer actually contains an encoded pointer to
* pt_regs on the stack. See ENCODE_FRAME_POINTER.
*/
+#ifdef CONFIG_X86_64
static struct pt_regs *decode_frame_pointer(unsigned long *bp)
{
unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
return (struct pt_regs *)(regs & ~0x1);
}
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+ unsigned long regs = (unsigned long)bp;
+
+ if (regs & 0x80000000)
+ return NULL;
+
+ return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = regs_size(regs);
+ len = KERNEL_REGS_SIZE;
state->got_irq = true;
} else {
frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
+ /*
+ * On 32-bit with user mode regs, make sure the last two regs are safe
+ * to access:
+ */
+ if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+ !on_stack(info, frame, len + 2*sizeof(long)))
+ return false;
+
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -328,6 +355,13 @@ bool unwind_next_frame(struct unwind_state *state)
state->regs->sp < (unsigned long)task_pt_regs(state->task))
goto the_end;
+ /*
+ * There are some known frame pointer issues on 32-bit. Disable
+ * unwinder warnings on 32-bit until it gets objtool support.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 72bf8c01c6e3..e1f095884386 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,12 @@
-# Kernel does not boot with instrumentation of tlb.c.
-KCOV_INSTRUMENT_tlb.o := n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_mem_encrypt.o := n
+
+KASAN_SANITIZE_mem_encrypt.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_mem_encrypt.o = -pg
+endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o setup_nx.o tlb.o
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 49d9778376d7..658bf0090565 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -30,6 +30,8 @@
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 *new_asid, bool *need_flush)
{
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
return;
/* Warn if we're not lazy. */
- WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
+ WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
switch_mm(NULL, &init_mm, NULL);
}
@@ -142,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
__flush_tlb_all();
}
#endif
+ this_cpu_write(cpu_tlbstate.is_lazy, false);
if (real_prev == next) {
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
- if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
- /*
- * There's nothing to do: we weren't lazy, and we
- * aren't changing our mm. We don't need to flush
- * anything, nor do we need to update CR3, CR4, or
- * LDTR.
- */
- return;
- }
-
- /* Resume remote flushes and then read tlb_gen. */
- cpumask_set_cpu(cpu, mm_cpumask(next));
- next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-
- if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
- next_tlb_gen) {
- /*
- * Ideally, we'd have a flush_tlb() variant that
- * takes the known CR3 value as input. This would
- * be faster on Xen PV and on hypothetical CPUs
- * on which INVPCID is fast.
- */
- this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
- next_tlb_gen);
- write_cr3(build_cr3(next, prev_asid));
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
- }
-
/*
- * We just exited lazy mode, which means that CR4 and/or LDTR
- * may be stale. (Changes to the required CR4 and LDTR states
- * are not reflected in tlb_gen.)
+ * We don't currently support having a real mm loaded without
+ * our cpu set in mm_cpumask(). We have all the bookkeeping
+ * in place to figure out whether we would need to flush
+ * if our cpu were cleared in mm_cpumask(), but we don't
+ * currently use it.
*/
+ if (WARN_ON_ONCE(real_prev != &init_mm &&
+ !cpumask_test_cpu(cpu, mm_cpumask(next))))
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ return;
} else {
u16 new_asid;
bool need_flush;
@@ -199,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
}
/* Stop remote flushes for the previous mm */
- if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
-
- VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+ real_prev != &init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
/*
* Start remote flushes and then read tlb_gen.
@@ -233,6 +213,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
}
/*
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm. Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row. It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+ return;
+
+ if (static_branch_unlikely(&tlb_use_lazy_mode)) {
+ /*
+ * There's a significant optimization that may be possible
+ * here. We have accurate enough TLB flush tracking that we
+ * don't need to maintain coherence of TLB per se when we're
+ * lazy. We do, however, need to maintain coherence of
+ * paging-structure caches. We could, in principle, leave our
+ * old mm loaded and only switch to init_mm when
+ * tlb_remove_page() happens.
+ */
+ this_cpu_write(cpu_tlbstate.is_lazy, true);
+ } else {
+ switch_mm(NULL, &init_mm, NULL);
+ }
+}
+
+/*
* Call this when reinitializing a CPU. It fixes the following potential
* problems:
*
@@ -303,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
+ if (unlikely(loaded_mm == &init_mm))
+ return;
+
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
loaded_mm->context.ctx_id);
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+ if (this_cpu_read(cpu_tlbstate.is_lazy)) {
/*
- * We're in lazy mode -- don't flush. We can get here on
- * remote flushes due to races and on local flushes if a
- * kernel thread coincidentally flushes the mm it's lazily
- * still using.
+ * We're in lazy mode. We need to at least flush our
+ * paging-structure cache to avoid speculatively reading
+ * garbage into our TLB. Since switching to init_mm is barely
+ * slower than a minimal flush, just switch to init_mm.
*/
+ switch_mm_irqs_off(NULL, &init_mm, NULL);
return;
}
@@ -611,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);
+
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[2];
+
+ buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
+ buf[1] = '\n';
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t tlblazy_write_file(struct file *file,
+ const char __user *user_buf, size_t count, loff_t *ppos)
+{
+ bool val;
+
+ if (kstrtobool_from_user(user_buf, count, &val))
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&tlb_use_lazy_mode);
+ else
+ static_branch_disable(&tlb_use_lazy_mode);
+
+ return count;
+}
+
+static const struct file_operations fops_tlblazy = {
+ .read = tlblazy_read_file,
+ .write = tlblazy_write_file,
+ .llseek = default_llseek,
+};
+
+static int __init init_tlb_use_lazy_mode(void)
+{
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * Heuristic: with PCID on, switching to and from
+ * init_mm is reasonably fast, but remote flush IPIs
+ * as expensive as ever, so turn off lazy TLB mode.
+ *
+ * We can't do this in setup_pcid() because static keys
+ * haven't been initialized yet, and it would blow up
+ * badly.
+ */
+ static_branch_disable(&tlb_use_lazy_mode);
+ }
+
+ debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
+ arch_debugfs_dir, NULL, &fops_tlblazy);
+ return 0;
+}
+late_initcall(init_tlb_use_lazy_mode);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-24 11:28 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-09-24 11:28 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton, Andy Lutomirski
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: f5caf621ee357279e759c0911daf6d55c7d36f03 x86/asm: Fix inline asm call constraints for Clang
Another round of CR3/PCID related fixes (I think this addresses all but one of the
known problems with PCID support), an objtool fix plus a Clang fix that (finally)
solves all Clang quirks to build a bootable x86 kernel as-is.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
tools/objtool/check.c # 0d0970eef3b0: objtool: Handle another GCC
Thanks,
Ingo
------------------>
Andy Lutomirski (4):
x86/mm: Factor out CR3-building code
x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
x86/mm/32: Load a sane CR3 before cpu_init() on secondary CPUs
Josh Poimboeuf (2):
objtool: Handle another GCC stack pointer adjustment bug
x86/asm: Fix inline asm call constraints for Clang
arch/x86/include/asm/alternative.h | 3 +-
arch/x86/include/asm/asm.h | 11 ++++++
arch/x86/include/asm/mmu_context.h | 32 +++++++++++++++---
arch/x86/include/asm/mshyperv.h | 10 +++---
arch/x86/include/asm/paravirt_types.h | 14 ++++----
arch/x86/include/asm/preempt.h | 15 +++------
arch/x86/include/asm/processor.h | 6 ++--
arch/x86/include/asm/rwsem.h | 4 +--
arch/x86/include/asm/uaccess.h | 4 +--
arch/x86/include/asm/xen/hypercall.h | 5 ++-
arch/x86/kernel/cpu/bugs.c | 8 -----
arch/x86/kernel/cpu/common.c | 8 +++++
arch/x86/kernel/smpboot.c | 13 +++----
arch/x86/kvm/emulate.c | 3 +-
arch/x86/kvm/vmx.c | 3 +-
arch/x86/mm/fault.c | 3 +-
arch/x86/mm/tlb.c | 11 +++---
tools/objtool/Documentation/stack-validation.txt | 6 ++--
tools/objtool/arch/x86/decode.c | 6 ++--
tools/objtool/check.c | 43 ++++++++++++++++--------
20 files changed, 122 insertions(+), 86 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
{ \
- register void *__sp asm(_ASM_SP); \
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
- : output, "+r" (__sp) \
+ : output, ASM_CALL_CONSTRAINT \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input); \
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..c1eadbaf1115 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits. This serves two purposes. It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero. It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(mm->pgd);
+ }
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
/*
* This can be used from process context to figure out what the value of
@@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
*/
static inline unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
- if (static_cpu_has(X86_FEATURE_PCID))
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
u64 hv_status;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("mov %4, %%r8\n"
"call *%5"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
: "r" (output_address), "m" (hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("call *%7"
: "=A" (hv_status),
- "+c" (input_address_lo), "+r" (__sp)
+ "+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
{
__asm__ __volatile__("call *%4"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: "m" (hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
__asm__ __volatile__ ("call *%5"
: "=A"(hv_status),
"+c"(input1_lo),
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
*/
#ifdef CONFIG_X86_32
#define PVOP_VCALL_ARGS \
- unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \
- register void *__sp asm("esp")
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx, __eax = __eax; \
- register void *__sp asm("rsp")
+ __edx = __edx, __ecx = __ecx, __eax = __eax;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
#ifdef CONFIG_PREEMPT
extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+ asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
extern asmlinkage void preempt_schedule(void);
extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+ asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
extern asmlinkage void preempt_schedule_notrace(void);
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
- register void *__sp asm(_ASM_SP);
-
#ifdef CONFIG_X86_32
asm volatile (
"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
"pushl $1f\n\t"
"iret\n\t"
"1:"
- : "+r" (__sp) : : "memory");
+ : ASM_CALL_CONSTRAINT : : "memory");
#else
unsigned int tmp;
@@ -703,7 +701,7 @@ static inline void sync_core(void)
"iretq\n\t"
UNWIND_HINT_RESTORE
"1:"
- : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
#endif
}
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
({ \
long tmp; \
struct rw_semaphore* ret; \
- register void *__sp asm(_ASM_SP); \
\
asm volatile("# beginning down_write\n\t" \
LOCK_PREFIX " xadd %1,(%4)\n\t" \
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
" call " slow_path "\n" \
"1:\n" \
"# ending down_write" \
- : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+ : "+m" (sem->count), "=d" (tmp), \
+ "=a" (ret), ASM_CALL_CONSTRAINT \
: "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
: "memory", "cc"); \
ret; \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..78e8fcc87d4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
- register void *__sp asm(_ASM_SP); \
__chk_user_ptr(ptr); \
might_fault(); \
asm volatile("call __get_user_%P4" \
- : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \
+ : "=a" (__ret_gu), "=r" (__val_gu), \
+ ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..128a1a0b1450 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
- register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
- register void *__sp asm(_ASM_SP);
+ register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
-#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT
#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
void __init check_bugs(void)
{
-#ifdef CONFIG_X86_32
- /*
- * Regardless of whether PCID is enumerated, the SDM says
- * that it can't be enabled in 32-bit mode.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
identify_boot_cpu();
if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
*/
if (boot_cpu_has(X86_FEATURE_PCID))
__write_cr4(__read_cr4() | X86_CR4_PCIDE);
- cpu_init();
- x86_cpuinit.early_percpu_clock_init();
- preempt_disable();
- smp_callin();
-
- enable_start_cpu0 = 0;
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
#endif
+ cpu_init();
+ x86_cpuinit.early_percpu_clock_init();
+ preempt_disable();
+ smp_callin();
+
+ enable_start_cpu0 = 0;
+
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..f23f13403f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
+ [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..6ee237f509dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- register void *__sp asm(_ASM_SP);
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
:
[entry]"r"(entry),
[ss]"i"(__KERNEL_DS),
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..39567b5c33da 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- register void *__sp asm("rsp");
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
/*
* We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
asm volatile ("movq %[stack], %%rsp\n\t"
"call handle_stack_overflow\n\t"
"1: jmp 1b"
- : "+r" (__sp)
+ : ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..93fe97cce581 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
- if (WARN_ON_ONCE(__read_cr3() !=
- (__sme_pa(real_prev->pgd) | prev_asid))) {
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | prev_asid);
+ write_cr3(build_cr3(next, prev_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
TLB_FLUSH_ALL);
}
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | new_asid);
+ write_cr3(build_cr3(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+ write_cr3(build_cr3_noflush(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
- write_cr3(cr3 & ~CR3_PCID_MASK);
+ write_cr3(build_cr3(mm, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
If it's a GCC-compiled .c file, the error may be because the function
uses an inline asm() statement which has a "call" instruction. An
asm() statement with a call instruction must declare the use of the
- stack pointer in its output operand. For example, on x86_64:
+ stack pointer in its output operand. On x86_64, this means adding
+ the ASM_CALL_CONSTRAINT as an output constraint:
- register void *__sp asm("rsp");
- asm volatile("call func" : "+r" (__sp));
+ asm volatile("call func" : ASM_CALL_CONSTRAINT);
Otherwise the stack frame may not get created before the call.
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..0f22768c0d4d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
break;
case 0x89:
- if (rex == 0x48 && modrm == 0xe5) {
+ if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
- /* mov %rsp, %rbp */
+ /* mov %rsp, reg */
*type = INSN_STACK;
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_BP;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
break;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
switch (op->src.type) {
case OP_SRC_REG:
- if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+ if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+ cfa->base == CFI_SP &&
+ regs[CFI_BP].base == CFI_CFA &&
+ regs[CFI_BP].offset == -cfa->offset) {
+
+ /* mov %rsp, %rbp */
+ cfa->base = op->dest.reg;
+ state->bp_scratch = false;
+ }
- if (cfa->base == CFI_SP &&
- regs[CFI_BP].base == CFI_CFA &&
- regs[CFI_BP].offset == -cfa->offset) {
+ else if (op->src.reg == CFI_SP &&
+ op->dest.reg == CFI_BP && state->drap) {
- /* mov %rsp, %rbp */
- cfa->base = op->dest.reg;
- state->bp_scratch = false;
- }
+ /* drap: mov %rsp, %rbp */
+ regs[CFI_BP].base = CFI_BP;
+ regs[CFI_BP].offset = -state->stack_size;
+ state->bp_scratch = false;
+ }
- else if (state->drap) {
+ else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
- /* drap: mov %rsp, %rbp */
- regs[CFI_BP].base = CFI_BP;
- regs[CFI_BP].offset = -state->stack_size;
- state->bp_scratch = false;
- }
+ /*
+ * mov %rsp, %reg
+ *
+ * This is needed for the rare case where GCC
+ * does:
+ *
+ * mov %rsp, %rax
+ * ...
+ * mov %rax, %rsp
+ */
+ state->vals[op->dest.reg].base = CFI_CFA;
+ state->vals[op->dest.reg].offset = -state->stack_size;
}
else if (op->dest.reg == cfa->base) {
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-13 17:54 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-09-13 17:54 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andy Lutomirski, Andrew Morton, Borislav Petkov
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 1278f58cdee63cfbb04e5624474a291c81a7a13b x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition
The main changes are the PCID fixes from Andy, but there's also two hyperv fixes
and two paravirt updates.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
MAINTAINERS # 30c1bbffe629: paravirt: Switch maintainer
Thanks,
Ingo
------------------>
Andy Lutomirski (3):
x86/mm: Get rid of VM_BUG_ON in switch_tlb_irqs_off()
x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3
x86/mm/64: Initialize CR4.PCIDE early
Juergen Gross (2):
x86/paravirt: Remove no longer used paravirt functions
paravirt: Switch maintainer
K. Y. Srinivasan (1):
x86/hyper-V: Allocate the IDT entry early in boot
Vitaly Kuznetsov (1):
x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition
MAINTAINERS | 4 +--
arch/x86/include/asm/desc.h | 3 +--
arch/x86/include/asm/paravirt.h | 37 --------------------------
arch/x86/include/asm/paravirt_types.h | 9 -------
arch/x86/include/asm/pgtable.h | 27 +++----------------
arch/x86/include/asm/special_insns.h | 10 +++----
arch/x86/include/uapi/asm/hyperv.h | 6 -----
arch/x86/kernel/cpu/common.c | 49 +++++------------------------------
arch/x86/kernel/cpu/mshyperv.c | 4 +--
arch/x86/kernel/paravirt.c | 5 ----
arch/x86/kernel/setup.c | 5 +++-
arch/x86/kernel/smpboot.c | 8 +++---
arch/x86/kvm/vmx.c | 2 +-
arch/x86/mm/init.c | 34 ++++++++++++++++++++++++
arch/x86/mm/pgtable.c | 7 +----
arch/x86/mm/tlb.c | 22 +++++++++++++++-
arch/x86/power/hibernate_64.c | 21 ++++++++++++++-
arch/x86/xen/enlighten_pv.c | 2 --
arch/x86/xen/mmu_pv.c | 2 --
19 files changed, 107 insertions(+), 150 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index f46a3225e398..14e76a41b302 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10129,7 +10129,7 @@ F: include/uapi/linux/ppdev.h
F: Documentation/parport*.txt
PARAVIRT_OPS INTERFACE
-M: Jeremy Fitzhardinge <jeremy@goop.org>
+M: Juergen Gross <jgross@suse.com>
M: Chris Wright <chrisw@sous-sol.org>
M: Alok Kataria <akataria@vmware.com>
M: Rusty Russell <rusty@rustcorp.com.au>
@@ -10137,7 +10137,7 @@ L: virtualization@lists.linux-foundation.org
S: Supported
F: Documentation/virtual/paravirt_ops.txt
F: arch/*/kernel/paravirt*
-F: arch/*/include/asm/paravirt.h
+F: arch/*/include/asm/paravirt*.h
F: include/linux/hypervisor.h
PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1a2ba368da39..9d0e13738ed3 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
#define store_gdt(dtr) native_store_gdt(dtr)
-#define store_idt(dtr) native_store_idt(dtr)
#define store_tr(tr) (tr = native_store_tr())
#define load_TLS(t, cpu) native_load_tls(t, cpu)
@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
asm volatile("sgdt %0":"=m" (*dtr));
}
-static inline void native_store_idt(struct desc_ptr *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
{
asm volatile("sidt %0":"=m" (*dtr));
}
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25dd22f7c70..12deec722cf0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}
-static inline unsigned long __read_cr4(void)
-{
- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
-}
-
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
}
-static inline void store_idt(struct desc_ptr *dtr)
-{
- PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
-}
static inline unsigned long paravirt_store_tr(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
}
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
-}
-
static inline pte_t __pte(pteval_t val)
{
pteval_t ret;
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
-{
- if (sizeof(pmdval_t) > sizeof(long))
- /* 5 arg words */
- pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
- else
- PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
- native_pmd_val(pmd));
-}
-
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
-{
- if (sizeof(pudval_t) > sizeof(long))
- /* 5 arg words */
- pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
- else
- PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
- native_pud_val(pud));
-}
-
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 6b64fc6367f2..42873edd9f9d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);
- unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long);
#ifdef CONFIG_X86_64
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
void (*load_idt)(const struct desc_ptr *);
- /* store_gdt has been removed. */
- void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
- void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmdval);
- void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pudval);
- void (*pte_update)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b4c44d419c5..b714934512b3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
#else /* !CONFIG_PARAVIRT */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
-#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
-#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
#define pmd_clear(pmd) native_pmd_clear(pmd)
-#define pte_update(mm, addr, ptep) do { } while (0)
-
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
native_set_pte(ptep, pte);
}
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp , pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
{
native_set_pmd(pmdp, pmd);
}
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
{
native_set_pud(pudp, pud);
}
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces. It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.
- */
-#define pte_update(mm, addr, ptep) do { } while (0)
-#endif
-
/*
* We only update the dirty/accessed state if we set
* the dirty bit by hand in the kernel, since the hardware
@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
- pte_update(mm, addr, ptep);
return pte;
}
@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
- pte_update(mm, addr, ptep);
}
#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 9efaabf5b54b..a24dfcf79f4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
extern asmlinkage void native_load_gs_index(unsigned);
+static inline unsigned long __read_cr4(void)
+{
+ return native_read_cr4();
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x);
}
-static inline unsigned long __read_cr4(void)
-{
- return native_read_cr4();
-}
-
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7032f4d8dff3..f65d12504e80 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,12 +153,6 @@
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
/*
- * HV_VP_SET available
- */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
-
-
-/*
* Crash notification flag.
*/
#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb1d3358a4af..775f10100d7f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
__setup("nompx", x86_mpx_setup);
#ifdef CONFIG_X86_64
-static int __init x86_pcid_setup(char *s)
+static int __init x86_nopcid_setup(char *s)
{
- /* require an exact match without trailing characters */
- if (strlen(s))
- return 0;
+ /* nopcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
/* do not emit a message if the feature is not present */
if (!boot_cpu_has(X86_FEATURE_PCID))
- return 1;
+ return 0;
setup_clear_cpu_cap(X86_FEATURE_PCID);
pr_info("nopcid: PCID feature disabled\n");
- return 1;
+ return 0;
}
-__setup("nopcid", x86_pcid_setup);
+early_param("nopcid", x86_nopcid_setup);
#endif
static int __init x86_noinvpcid_setup(char *s)
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
-static void setup_pcid(struct cpuinfo_x86 *c)
-{
- if (cpu_has(c, X86_FEATURE_PCID)) {
- if (cpu_has(c, X86_FEATURE_PGE)) {
- /*
- * We'd like to use cr4_set_bits_and_update_boot(),
- * but we can't. CR4.PCIDE is special and can only
- * be set in long mode, and the early CPU init code
- * doesn't know this and would try to restore CR4.PCIDE
- * prior to entering long mode.
- *
- * Instead, we rely on the fact that hotplug, resume,
- * etc all fully restore CR4 before they write anything
- * that could have nonzero PCID bits to CR3. CR4.PCIDE
- * has no effect on the page tables themselves, so we
- * don't need it to be restored early.
- */
- cr4_set_bits(X86_CR4_PCIDE);
- } else {
- /*
- * flush_tlb_all(), as currently implemented, won't
- * work if PCID is on but PGE is not. Since that
- * combination doesn't exist on real hardware, there's
- * no reason to try to fully support it, but it's
- * polite to avoid corrupting data if we're on
- * an improperly configured VM.
- */
- clear_cpu_cap(c, X86_FEATURE_PCID);
- }
- }
-}
-
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c);
setup_smap(c);
- /* Set up PCID */
- setup_pcid(c);
-
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3b3f713e15e5..236324e83a3a 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
void hv_setup_vmbus_irq(void (*handler)(void))
{
vmbus_handler = handler;
- /* Setup the IDT for hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
}
void hv_remove_vmbus_irq(void)
@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
*/
x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops();
+ /* Setup the IDT for hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
#endif
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a14df9eecfed..19a3e8f961c7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_debugreg = native_set_debugreg,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
- .read_cr4 = native_read_cr4,
.write_cr4 = native_write_cr4,
#ifdef CONFIG_X86_64
.read_cr8 = native_read_cr8,
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
.load_idt = native_load_idt,
- .store_idt = native_store_idt,
.store_tr = native_store_tr,
.load_tls = native_load_tls,
#ifdef CONFIG_X86_64
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.set_pte = native_set_pte,
.set_pte_at = native_set_pte_at,
.set_pmd = native_set_pmd,
- .set_pmd_at = native_set_pmd_at,
- .pte_update = paravirt_nop,
.ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
- .set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d84afb0a322d..0957dd73d127 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
* with the current CR4 value. This may not be necessary, but
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
+ *
+ * Mask off features that don't work outside long mode (just
+ * PCIDE for now).
*/
- mmu_cr4_features = __read_cr4();
+ mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
memblock_set_current_limit(get_max_mapped());
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index cd6622c3204e..0854ff169274 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -226,10 +226,12 @@ static int enable_start_cpu0;
static void notrace start_secondary(void *unused)
{
/*
- * Don't put *anything* before cpu_init(), SMP booting is too
- * fragile that we want to limit the things done here to the
- * most necessary things.
+ * Don't put *anything* except direct CPU state initialization
+ * before cpu_init(), SMP booting is too fragile that we want to
+ * limit the things done here to the most necessary things.
*/
+ if (boot_cpu_has(X86_FEATURE_PCID))
+ __write_cr4(__read_cr4() | X86_CR4_PCIDE);
cpu_init();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4253adef9044..699704d4bc9e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- native_store_idt(&dt);
+ store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
vmx->host_idt_base = dt.address;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7777ccc0e9f9..af5c1ed21d43 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -19,6 +19,7 @@
#include <asm/microcode.h>
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
+#include <asm/cpufeature.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
}
}
+static void setup_pcid(void)
+{
+#ifdef CONFIG_X86_64
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ /*
+ * This can't be cr4_set_bits_and_update_boot() --
+ * the trampoline code can't handle CR4.PCIDE and
+ * it wouldn't do any good anyway. Despite the name,
+ * cr4_set_bits_and_update_boot() doesn't actually
+ * cause the bits in question to remain set all the
+ * way through the secondary boot asm.
+ *
+ * Instead, we brute-force it and set CR4.PCIDE
+ * manually in start_secondary().
+ */
+ cr4_set_bits(X86_CR4_PCIDE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ }
+ }
+#endif
+}
+
#ifdef CONFIG_X86_32
#define NR_RANGE_MR 3
#else /* CONFIG_X86_64 */
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
unsigned long end;
probe_page_size_mask();
+ setup_pcid();
#ifdef CONFIG_X86_64
end = max_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 218834a3e9ad..b372f3442bbf 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
{
int changed = !pte_same(*ptep, entry);
- if (changed && dirty) {
+ if (changed && dirty)
*ptep = entry;
- pte_update(vma->vm_mm, address, ptep);
- }
return changed;
}
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *) &ptep->pte);
- if (ret)
- pte_update(vma->vm_mm, addr, ptep);
-
return ret;
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 37689a7cc03b..1ab3821f9e26 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* hypothetical buggy code that directly switches to swapper_pg_dir
* without going through leave_mm() / switch_mm_irqs_off() or that
* does something like write_cr3(read_cr3_pa()).
+ *
+ * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+ * isn't free.
*/
- VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+ if (WARN_ON_ONCE(__read_cr3() !=
+ (__sme_pa(real_prev->pgd) | prev_asid))) {
+ /*
+ * If we were to BUG here, we'd be very likely to kill
+ * the system so hard that we don't see the call trace.
+ * Try to recover instead by ignoring the error and doing
+ * a global flush to minimize the chance of corruption.
+ *
+ * (This is far from being a fully correct recovery.
+ * Architecturally, the CPU could prefetch something
+ * back into an incorrect ASID slot and leave it there
+ * to cause trouble down the road. It's better than
+ * nothing, though.)
+ */
+ __flush_tlb_all();
+ }
+#endif
if (real_prev == next) {
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2598d81cd55..f910c514438f 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
return -EOVERFLOW;
rdr->jump_address = (unsigned long)restore_registers;
rdr->jump_address_phys = __pa_symbol(restore_registers);
- rdr->cr3 = restore_cr3;
+
+ /*
+ * The restore code fixes up CR3 and CR4 in the following sequence:
+ *
+ * [in hibernation asm]
+ * 1. CR3 <= temporary page tables
+ * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+ * 3. CR3 <= rdr->cr3
+ * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+ * [in restore_processor_state()]
+ * 5. CR4 <= saved CR4
+ * 6. CR3 <= saved CR3
+ *
+ * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+ * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+ * rdr->cr3 needs to point to valid page tables but must not
+ * have any of the PCID bits set.
+ */
+ rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
rdr->magic = RESTORE_MAGIC;
hibernation_e820_save(rdr->e820_digest);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae2a2e2d6362..69b9deff7e5c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_cr0 = xen_read_cr0,
.write_cr0 = xen_write_cr0,
- .read_cr4 = native_read_cr4,
.write_cr4 = xen_write_cr4,
#ifdef CONFIG_X86_64
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
- .store_idt = native_store_idt,
.store_tr = xen_store_tr,
.write_ldt_entry = xen_write_ldt_entry,
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 6b983b300666..509f560bd0c6 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_single = xen_flush_tlb_single,
.flush_tlb_others = xen_flush_tlb_others,
- .pte_update = paravirt_nop,
-
.pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free,
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-09-12 15:38 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-09-12 15:38 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: e2329b4252f373c244e75928be38bf1dd45b35da x86/cpu: Remove unused and undefined __generic_processor_info() declaration
Two fixes: dead code removal, plus a SME memory encryption fix on 32-bit kernels
that crashed Xen guests.
out-of-topic modifications in x86-urgent-for-linus:
-----------------------------------------------------
include/linux/mem_encrypt.h # 21d9bb4a05ba: x86/mm: Make the SME mask a
Thanks,
Ingo
------------------>
Borislav Petkov (1):
x86/mm: Make the SME mask a u64
Dou Liyang (1):
x86/cpu: Remove unused and undefined __generic_processor_info() declaration
arch/x86/include/asm/mem_encrypt.h | 4 ++--
arch/x86/include/asm/mpspec.h | 1 -
arch/x86/kernel/apic/apic.c | 2 +-
arch/x86/mm/mem_encrypt.c | 2 +-
include/linux/mem_encrypt.h | 13 +++++++++----
5 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 8e618fcf1f7c..6a77c63540f7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,7 +21,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
-extern unsigned long sme_me_mask;
+extern u64 sme_me_mask;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
@@ -49,7 +49,7 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
#else /* !CONFIG_AMD_MEM_ENCRYPT */
-#define sme_me_mask 0UL
+#define sme_me_mask 0ULL
static inline void __init sme_early_encrypt(resource_size_t paddr,
unsigned long size) { }
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 831eb7895535..c471ca1f9412 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -86,7 +86,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
#endif
int generic_processor_info(int apicid, int version);
-int __generic_processor_info(int apicid, int version, bool enabled);
#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7834f73efbf1..6e19ef152869 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2130,7 +2130,7 @@ int generic_processor_info(int apicid, int version)
* Since fixing handling of boot_cpu_physical_apicid requires
* another discussion and tests on each platform, we leave it
* for now and here we use read_apic_id() directly in this
- * function, __generic_processor_info().
+ * function, generic_processor_info().
*/
if (disabled_cpu_apicid != BAD_APICID &&
disabled_cpu_apicid != read_apic_id() &&
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 0fbd09269757..3fcc8e01683b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -37,7 +37,7 @@ static char sme_cmdline_off[] __initdata = "off";
* reside in the .data section so as not to be zeroed out when the .bss
* section is later cleared.
*/
-unsigned long sme_me_mask __section(.data) = 0;
+u64 sme_me_mask __section(.data) = 0;
EXPORT_SYMBOL_GPL(sme_me_mask);
/* Buffer used for early in-place encryption by BSP, no locking needed */
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 1255f09f5e42..265a9cd21cb4 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -21,7 +21,7 @@
#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
-#define sme_me_mask 0UL
+#define sme_me_mask 0ULL
#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
@@ -30,18 +30,23 @@ static inline bool sme_active(void)
return !!sme_me_mask;
}
-static inline unsigned long sme_get_me_mask(void)
+static inline u64 sme_get_me_mask(void)
{
return sme_me_mask;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
* The __sme_set() and __sme_clr() macros are useful for adding or removing
* the encryption mask from a value (e.g. when dealing with pagetable
* entries).
*/
-#define __sme_set(x) ((unsigned long)(x) | sme_me_mask)
-#define __sme_clr(x) ((unsigned long)(x) & ~sme_me_mask)
+#define __sme_set(x) ((x) | sme_me_mask)
+#define __sme_clr(x) ((x) & ~sme_me_mask)
+#else
+#define __sme_set(x) (x)
+#define __sme_clr(x) (x)
+#endif
#endif /* __ASSEMBLY__ */
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-08-26 7:26 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-08-26 7:26 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: ccd5b3235180eef3cfec337df1c8554ab151b5cc x86/mm: Fix use-after-free of ldt_struct
Two fixes: one for an ldt_struct handling bug and a cherry-picked objtool fix.
Thanks,
Ingo
------------------>
Eric Biggers (1):
x86/mm: Fix use-after-free of ldt_struct
Josh Poimboeuf (1):
objtool: Fix '-mtune=atom' decoding support in objtool 2.0
arch/x86/include/asm/mmu_context.h | 4 +---
tools/objtool/arch/x86/decode.c | 26 +++++++++++++++++++++++++-
2 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907d7d4c..7a234be7e298 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
- init_new_context_ldt(tsk, mm);
-
- return 0;
+ return init_new_context_ldt(tsk, mm);
}
static inline void destroy_context(struct mm_struct *mm)
{
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a36c2eba64e7..4559a21a8de2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
case 0x8d:
if (rex == 0x48 && modrm == 0x65) {
- /* lea -disp(%rbp), %rsp */
+ /* lea disp(%rbp), %rsp */
*type = INSN_STACK;
op->src.type = OP_SRC_ADD;
op->src.reg = CFI_BP;
@@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
break;
}
+ if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) &&
+ sib == 0x24) {
+
+ /* lea disp(%rsp), %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ if (rex == 0x48 && modrm == 0x2c && sib == 0x24) {
+
+ /* lea (%rsp), %rbp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_BP;
+ break;
+ }
+
if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
insn.displacement.value == 8) {
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-07-21 10:26 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-07-21 10:26 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: db15e7f27369b81b6605a546d54eb844f87370a5 x86/devicetree: Convert to using %pOF instead of ->full_name
Half of the fixes are for various build time warnings triggered by randconfig
builds. Most (but not all...) were harmless. There's also:
- ACPI boundary condition fixes,
- UV platform fixes,
- defconfig updates,
- an AMD K6 CPU init fix,
- a %pOF printk format related preparatory change,
- and warning fix related to the tlb/PCID changes.
Thanks,
Ingo
------------------>
Andrew Banman (1):
x86/platform/uv/BAU: Disable BAU on single hub configurations
Arnd Bergmann (7):
perf/x86: Shut up false-positive -Wmaybe-uninitialized warning
x86/fpu/math-emu: Fix possible uninitialized variable use
x86/fpu/math-emu: Avoid bogus -Wint-in-bool-context warning
x86/io: Add "memory" clobber to insb/insw/insl/outsb/outsw/outsl
x86/build: Silence the build with "make -s"
x86/platform: Add PCI dependency for PUNIT_ATOM_DEBUG
x86/platform/intel-mid: Fix a format string overflow warning
Justin Ernst (1):
x86/platform/uv/BAU: Fix congested_response_us not taking effect
Krzysztof Kozlowski (1):
x86/defconfig: Remove stale, old Kconfig options
Mikulas Patocka (1):
x86/cpu: Use indirect call to measure performance in init_amd_k6()
Rob Herring (1):
x86/devicetree: Convert to using %pOF instead of ->full_name
Roman Kagan (1):
x86/mm, KVM: Fix warning when !CONFIG_PREEMPT_COUNT
Seunghun Han (2):
x86/acpi: Prevent out of bound access caused by broken ACPI tables
x86/ioapic: Pass the correct data to unmask_ioapic_irq()
arch/x86/Kconfig.debug | 1 +
arch/x86/boot/Makefile | 5 ++--
arch/x86/configs/i386_defconfig | 3 ---
arch/x86/configs/x86_64_defconfig | 3 ---
arch/x86/events/core.c | 4 +--
arch/x86/include/asm/io.h | 4 +--
arch/x86/include/asm/mmu_context.h | 2 +-
arch/x86/kernel/acpi/boot.c | 8 ++++++
arch/x86/kernel/apic/io_apic.c | 2 +-
arch/x86/kernel/cpu/amd.c | 1 +
arch/x86/kernel/devicetree.c | 3 +--
arch/x86/math-emu/Makefile | 4 +--
arch/x86/math-emu/fpu_emu.h | 2 +-
arch/x86/math-emu/reg_compare.c | 16 ++++++------
.../intel-mid/device_libs/platform_max7315.c | 6 +++--
arch/x86/platform/uv/tlb_uv.c | 29 +++++++++++++++-------
16 files changed, 55 insertions(+), 38 deletions(-)
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fcb7604172ce..cd20ca0b4043 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -348,6 +348,7 @@ config X86_DEBUG_FPU
config PUNIT_ATOM_DEBUG
tristate "ATOM Punit debug driver"
+ depends on PCI
select DEBUG_FS
select IOSF_MBI
---help---
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0d810fb15eac..d88a2fddba8c 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -73,12 +73,13 @@ UBSAN_SANITIZE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
+silent_redirect_image = >/dev/null
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
- $(obj)/zoffset.h $@
+ $(obj)/zoffset.h $@ $($(quiet)redirect_image)
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
- @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+ @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6cf79e1a6830..0eb9f92f3717 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,5 +1,4 @@
# CONFIG_64BIT is not set
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -125,7 +124,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
@@ -255,7 +253,6 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index de45f57b410d..4a4b16e56d35 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -124,7 +123,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
CONFIG_IP_NF_MANGLE=y
@@ -251,7 +249,6 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ff1ea2fb9705..8e3db8f642a7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -191,8 +191,8 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
- u64 val, val_fail, val_new= ~0;
- int i, reg, reg_fail, ret = 0;
+ u64 val, val_fail = -1, val_new= ~0;
+ int i, reg, reg_fail = -1, ret = 0;
int bios_fail = 0;
int reg_safe = -1;
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2f07f4..48febf07e828 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -328,13 +328,13 @@ static inline unsigned type in##bwl##_p(int port) \
static inline void outs##bwl(int port, const void *addr, unsigned long count) \
{ \
asm volatile("rep; outs" #bwl \
- : "+S"(addr), "+c"(count) : "d"(port)); \
+ : "+S"(addr), "+c"(count) : "d"(port) : "memory"); \
} \
\
static inline void ins##bwl(int port, void *addr, unsigned long count) \
{ \
asm volatile("rep; ins" #bwl \
- : "+D"(addr), "+c"(count) : "d"(port)); \
+ : "+D"(addr), "+c"(count) : "d"(port) : "memory"); \
}
BUILDIO(b, b, char)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index ecfcb6643c9b..265c907d7d4c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -293,7 +293,7 @@ static inline unsigned long __get_current_cr3_fast(void)
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
/* For now, be very restrictive about when this can be called. */
- VM_WARN_ON(in_nmi() || !in_atomic());
+ VM_WARN_ON(in_nmi() || preemptible());
VM_BUG_ON(cr3 != __read_cr3());
return cr3;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb680671088..7491e73d9253 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -347,6 +347,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
struct mpc_intsrc mp_irq;
/*
+ * Check bus_irq boundary.
+ */
+ if (bus_irq >= NR_IRQS_LEGACY) {
+ pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
+ return;
+ }
+
+ /*
* Convert 'gsi' to 'ioapic.pin'.
*/
ioapic = mp_find_ioapic(gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b4f5f73febdb..237e9c2341c7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2093,7 +2093,7 @@ static inline void __init check_timer(void)
int idx;
idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_trigger(idx))
- unmask_ioapic_irq(irq_get_chip_data(0));
+ unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8f5fd4..3b9e220621f8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -134,6 +134,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
n = K6_BUG_LOOP;
f_vide = vide;
+ OPTIMIZER_HIDE_VAR(f_vide);
d = rdtsc();
while (n--)
f_vide();
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced4..cbf1f6ba39a8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -235,8 +235,7 @@ static void __init dtb_add_ioapic(struct device_node *dn)
ret = of_address_to_resource(dn, 0, &r);
if (ret) {
- printk(KERN_ERR "Can't obtain address from node %s.\n",
- dn->full_name);
+ printk(KERN_ERR "Can't obtain address from device node %pOF.\n", dn);
return;
}
mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile
index 9b0c63b60302..1b2dac174321 100644
--- a/arch/x86/math-emu/Makefile
+++ b/arch/x86/math-emu/Makefile
@@ -5,8 +5,8 @@
#DEBUG = -DDEBUGGING
DEBUG =
PARANOID = -DPARANOID
-EXTRA_CFLAGS := $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
-EXTRA_AFLAGS := $(PARANOID)
+ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+asflags-y += $(PARANOID)
# From 'C' language sources:
C_OBJS =fpu_entry.o errors.o \
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index afbc4d805d66..c9c320dccca1 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -157,7 +157,7 @@ extern u_char const data_sizes_16[32];
#define signbyte(a) (((u_char *)(a))[9])
#define getsign(a) (signbyte(a) & 0x80)
-#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
+#define setsign(a,b) { if ((b) != 0) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
else signbyte(b) &= 0x7f; }
#define changesign(a) { signbyte(a) ^= 0x80; }
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index b77360fdbf4a..19b33b50adfa 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -168,7 +168,7 @@ static int compare(FPU_REG const *b, int tagb)
/* This function requires that st(0) is not empty */
int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
{
- int f = 0, c;
+ int f, c;
c = compare(loaded_data, loaded_tag);
@@ -189,12 +189,12 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x121);
+#endif /* PARANOID */
f = SW_C3 | SW_C2 | SW_C0;
break;
-#endif /* PARANOID */
}
setcc(f);
if (c & COMP_Denormal) {
@@ -205,7 +205,7 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
static int compare_st_st(int nr)
{
- int f = 0, c;
+ int f, c;
FPU_REG *st_ptr;
if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
@@ -235,12 +235,12 @@ static int compare_st_st(int nr)
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
f = SW_C3 | SW_C2 | SW_C0;
break;
-#endif /* PARANOID */
}
setcc(f);
if (c & COMP_Denormal) {
@@ -283,12 +283,12 @@ static int compare_i_st_st(int nr)
case COMP_No_Comp:
f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
break;
-#ifdef PARANOID
default:
+#ifdef PARANOID
EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
f = 0;
break;
-#endif /* PARANOID */
}
FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
if (c & COMP_Denormal) {
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 6e075afa7877..58337b2bc682 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -38,8 +38,10 @@ static void __init *max7315_platform_data(void *info)
*/
strcpy(i2c_info->type, "max7315");
if (nr++) {
- sprintf(base_pin_name, "max7315_%d_base", nr);
- sprintf(intr_pin_name, "max7315_%d_int", nr);
+ snprintf(base_pin_name, sizeof(base_pin_name),
+ "max7315_%d_base", nr);
+ snprintf(intr_pin_name, sizeof(intr_pin_name),
+ "max7315_%d_int", nr);
} else {
strcpy(base_pin_name, "max7315_base");
strcpy(intr_pin_name, "max7315_int");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index d4a61ddf9e62..3e4bdb442fbc 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -40,7 +40,6 @@ static int timeout_base_ns[] = {
static int timeout_us;
static bool nobau = true;
static int nobau_perm;
-static cycles_t congested_cycles;
/* tunables: */
static int max_concurr = MAX_BAU_CONCURRENT;
@@ -829,10 +828,10 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
bcp->period_requests++;
bcp->period_time += elapsed;
- if ((elapsed > congested_cycles) &&
+ if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
(bcp->period_requests > bcp->cong_reps) &&
((bcp->period_time / bcp->period_requests) >
- congested_cycles)) {
+ usec_2_cycles(bcp->cong_response_us))) {
stat->s_congested++;
disable_for_period(bcp, stat);
}
@@ -2222,14 +2221,17 @@ static int __init uv_bau_init(void)
else if (is_uv1_hub())
ops = uv1_bau_ops;
+ nuvhubs = uv_num_possible_blades();
+ if (nuvhubs < 2) {
+ pr_crit("UV: BAU disabled - insufficient hub count\n");
+ goto err_bau_disable;
+ }
+
for_each_possible_cpu(cur_cpu) {
mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
}
- nuvhubs = uv_num_possible_blades();
- congested_cycles = usec_2_cycles(congested_respns_us);
-
uv_base_pnode = 0x7fffffff;
for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
cpus = uv_blade_nr_possible_cpus(uvhub);
@@ -2242,9 +2244,8 @@ static int __init uv_bau_init(void)
enable_timeouts();
if (init_per_cpu(nuvhubs, uv_base_pnode)) {
- set_bau_off();
- nobau_perm = 1;
- return 0;
+ pr_crit("UV: BAU disabled - per CPU init failed\n");
+ goto err_bau_disable;
}
vector = UV_BAU_MESSAGE;
@@ -2270,6 +2271,16 @@ static int __init uv_bau_init(void)
}
return 0;
+
+err_bau_disable:
+
+ for_each_possible_cpu(cur_cpu)
+ free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
+
+ set_bau_off();
+ nobau_perm = 1;
+
+ return -EINVAL;
}
core_initcall(uv_bau_init);
fs_initcall(uv_ptc_init);
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-06-10 9:03 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-06-10 9:03 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: 5b0bc9ac2ce4881ee318a21f31140584ce4dbdad x86/microcode/intel: Clear patch pointer before jettisoning the initrd
Misc fixes: a Geode fix plus a microcode loader fix.
Thanks,
Ingo
------------------>
Christian Sünkenberg (1):
x86/cpu/cyrix: Add alternative Device ID of Geode GX1 SoC
Dominik Brodowski (1):
x86/microcode/intel: Clear patch pointer before jettisoning the initrd
arch/x86/kernel/cpu/cyrix.c | 1 +
arch/x86/kernel/cpu/microcode/intel.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61095f8..6f077445647a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
break;
case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+ case 11: /* GX1 with inverted Device ID */
#ifdef CONFIG_PCI
{
u32 vendor, device;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afdfd237b59f..f522415bf9e5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -619,6 +619,9 @@ int __init save_microcode_in_initrd_intel(void)
show_saved_mc();
+ /* initrd is going away, clear patch ptr. */
+ intel_ucode_patch = NULL;
+
return 0;
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-06-02 6:54 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-06-02 6:54 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: c08d517480ea342cc43acdacc5cf4a795e18151d Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"
Misc fixes:
- revert a broken PAT commit that broke a number of systems,
- fix two preemptability warnings/bugs that can trigger under certain
circumstances, in the debug code and in the microcode loader.
Thanks,
Ingo
------------------>
Borislav Petkov (2):
x86/microcode/AMD: Change load_microcode_amd()'s param to bool to fix preemptibility bug
x86/debug/32: Convert a smp_processor_id() call to raw to avoid DEBUG_PREEMPT warning
Ingo Molnar (1):
Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT"
arch/x86/kernel/cpu/microcode/amd.c | 16 ++++++++--------
arch/x86/kernel/process_32.c | 2 +-
arch/x86/mm/pat.c | 9 +++------
3 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 45db4d2ebd01..e9f4d762aa5b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
}
static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
{
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
- desc.data, desc.size);
+ ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret != UCODE_OK)
return -EINVAL;
@@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
}
static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
{
enum ucode_state ret;
@@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
#ifdef CONFIG_X86_32
/* save BSP's matching patch for early load */
- if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
- struct ucode_patch *p = find_patch(cpu);
+ if (save) {
+ struct ucode_patch *p = find_patch(0);
if (p) {
memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
- if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+ if (!refresh_fw || !bsp)
return UCODE_OK;
if (c->x86 >= 0x15)
@@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+ ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74c9181..ffeae818aa7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
- smp_processor_id());
+ raw_smp_processor_id());
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83a59a67757a..9b78685b66e6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -65,11 +65,9 @@ static int __init nopat(char *str)
}
early_param("nopat", nopat);
-static bool __read_mostly __pat_initialized = false;
-
bool pat_enabled(void)
{
- return __pat_initialized;
+ return !!__pat_enabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat)
}
wrmsrl(MSR_IA32_CR_PAT, pat);
- __pat_initialized = true;
__init_cache_modes(pat);
}
static void pat_ap_init(u64 pat)
{
- if (!this_cpu_has(X86_FEATURE_PAT)) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
* If this happens we are on a secondary CPU, but switched to
* PAT on the boot CPU. We have no way to undo PAT.
@@ -309,7 +306,7 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
- if (!__pat_enabled) {
+ if (!pat_enabled()) {
init_cache_modes();
return;
}
^ permalink raw reply related [flat|nested] 547+ messages in thread
* [GIT PULL] x86 fixes
@ 2017-05-12 7:39 Ingo Molnar
0 siblings, 0 replies; 547+ messages in thread
From: Ingo Molnar @ 2017-05-12 7:39 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Thomas Gleixner, H. Peter Anvin, Peter Zijlstra,
Andrew Morton
Linus,
Please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus
# HEAD: fb8fb46c56289b3f34b5d90a4ec65e9e4e4544a5 x86/intel_rdt: Fix a typo in Documentation
It's mostly misc fixes:
- two boot crash fixes
- unwinder fixes
- kexec related kernel direct mappings enhancements/fixes
- more Clang support quirks
- minor cleanups
- Documentation fixes
Thanks,
Ingo
------------------>
Andy Lutomirski (1):
x86/boot/32: Fix UP boot on Quark and possibly other platforms
Baoquan He (1):
x86/mm: Fix boot crash caused by incorrect loop count calculation in sync_global_pgds()
Colin Ian King (1):
x86/microcode/AMD: Remove redundant NULL check on mc
Josh Poimboeuf (1):
x86/asm: Don't use RBP as a temporary register in csum_partial_copy_generic()
Kees Cook (1):
x86/boot: Declare error() as noreturn
Laura Abbott (1):
x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init()
Matthias Kaehlcke (1):
x86/mm/kaslr: Use the _ASM_MUL macro for multiplication to work around Clang incompatibility
Nick Desaulniers (1):
x86/build: Don't add -maccumulate-outgoing-args w/o compiler support
Xiaochen Shen (1):
x86/intel_rdt: Fix a typo in Documentation
Xunlei Pang (2):
x86/mm: Add support for gbpages to kernel_ident_mapping_init()
x86/kexec/64: Use gbpages for identity mappings if available
Documentation/x86/intel_rdt_ui.txt | 2 +-
arch/x86/Makefile | 3 ++-
arch/x86/boot/compressed/error.h | 4 +++-
arch/x86/boot/compressed/pagetable.c | 2 +-
arch/x86/include/asm/asm.h | 1 +
arch/x86/include/asm/init.h | 3 ++-
arch/x86/kernel/cpu/microcode/amd.c | 2 --
arch/x86/kernel/machine_kexec_64.c | 6 +++++-
arch/x86/kernel/setup.c | 15 +++++++++++++++
arch/x86/kernel/setup_percpu.c | 10 +++++-----
arch/x86/lib/csum-copy_64.S | 12 ++++++------
arch/x86/lib/kaslr.c | 3 ++-
arch/x86/mm/ident_map.c | 14 +++++++++++++-
arch/x86/mm/init_64.c | 12 ++++++------
arch/x86/mm/numa_32.c | 1 +
arch/x86/power/hibernate_64.c | 2 +-
16 files changed, 64 insertions(+), 28 deletions(-)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 0f6d8477b66c..c491a1b82de2 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -295,7 +295,7 @@ kernel and the tasks running there get 50% of the cache. They should
also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
siblings and only the real time threads are scheduled on the cores 4-7.
-# echo C0 > p0/cpus
+# echo F0 > p0/cpus
4) Locking between applications
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4430dd489620..5851411e60fb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL
endif
ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
- KBUILD_CFLAGS += -maccumulate-outgoing-args
+ # This compiler flag is not supported by Clang:
+ KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
endif
# Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 2e59dac07f9e..d732e608e3af 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -1,7 +1,9 @@
#ifndef BOOT_COMPRESSED_ERROR_H
#define BOOT_COMPRESSED_ERROR_H