This is the 5.4.217 stable release

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmM/0m4ACgkQONu9yGCS
 aT4WDQ/+KW69exv+g+Yz7p095M1hqjqzyQS4tLBFFkdKWXvUPF2HVCFYn9tOZVFe
 qu8l8tIcbvxAB9ec4YaQvRMYKsnJhJzKQY88+C9Div72DDAN2699ICFUiwQngv7B
 vmIe1oPzxGnDD4cRtaDhH9p0CDqopS1h3x1ctMrU2wPKQQM+AinW56ug0cCvjdmK
 kcIDvG7a8aXc9xOKaqo4naHkk9Qu+x02w9lql25AZDADp+ZHomh7R/OjHoYnRwaR
 1g3Q+yy8iAaWCBGAiyyFcP+kPeSHBjcu2rWoGdw1LimOB6loxX0JRkHF2tnSVSJI
 DQcCkvnFHJQDpOQHG2O9mqvtpsMdgdX8XvJCJFnlnzUFEAPLxxQ6zARhVaYBR8X2
 kC+D/DG7Wk7yhKtBtdXug7fSxwTdPMj02zBvhElri7KUUueoALa2BYF8TOkXLhZa
 R8PL7pMbMaUDfvkLw580PsAbDiVy4XzWKRiK2ZGUe5CjqT7KXxgmzCdxXYGzQXF9
 FDzZD7++CAQ1qWxEM+WugSFGiDNYqDHdTQmdbb+mci9JdpYhUA0C2ZPwWGF6uKBV
 T84zSwAegXrNL//1ugxRxM/5cv1qAWhLAFzbihjTd27gtRyXFejLpPJTF27VDYN+
 B4JIf2D+wHSFszV1SDBt08D40tnPnYTRQ56dMt9M2Fnaq9H8pwo=
 =3udV
 -----END PGP SIGNATURE-----

Merge 5.4.217 into android11-5.4-lts

Changes in 5.4.217
	Revert "x86/speculation: Add RSB VM Exit protections"
	Revert "x86/cpu: Add a steppings field to struct x86_cpu_id"
	x86/devicetable: Move x86 specific macro out of generic code
	x86/cpu: Add consistent CPU match macros
	x86/cpu: Add a steppings field to struct x86_cpu_id
	x86/kvm/vmx: Make noinstr clean
	x86/cpufeatures: Move RETPOLINE flags to word 11
	x86/bugs: Report AMD retbleed vulnerability
	x86/bugs: Add AMD retbleed= boot parameter
	x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value
	x86/entry: Remove skip_r11rcx
	x86/entry: Add kernel IBRS implementation
	x86/bugs: Optimize SPEC_CTRL MSR writes
	x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS
	x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation()
	x86/bugs: Report Intel retbleed vulnerability
	intel_idle: Disable IBRS during long idle
	x86/speculation: Change FILL_RETURN_BUFFER to work with objtool
	x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n
	x86/speculation: Fix firmware entry SPEC_CTRL handling
	x86/speculation: Fix SPEC_CTRL write on SMT state change
	x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit
	x86/speculation: Remove x86_spec_ctrl_mask
	KVM/VMX: Use TEST %REG,%REG instead of CMP $0,%REG in vmenter.S
	KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw
	KVM: VMX: Flatten __vmx_vcpu_run()
	KVM: VMX: Convert launched argument to flags
	KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS
	KVM: VMX: Fix IBRS handling after vmexit
	x86/speculation: Fill RSB on vmexit for IBRS
	x86/common: Stamp out the stepping madness
	x86/cpu/amd: Enumerate BTC_NO
	x86/bugs: Add Cannon lake to RETBleed affected CPU list
	x86/speculation: Disable RRSBA behavior
	x86/speculation: Use DECLARE_PER_CPU for x86_spec_ctrl_current
	x86/bugs: Warn when "ibrs" mitigation is selected on Enhanced IBRS parts
	x86/speculation: Add RSB VM Exit protections
	xfs: fix misuse of the XFS_ATTR_INCOMPLETE flag
	xfs: introduce XFS_MAX_FILEOFF
	xfs: truncate should remove all blocks, not just to the end of the page cache
	xfs: fix s_maxbytes computation on 32-bit kernels
	xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read
	xfs: refactor remote attr value buffer invalidation
	xfs: fix memory corruption during remote attr value buffer invalidation
	xfs: move incore structures out of xfs_da_format.h
	xfs: streamline xfs_attr3_leaf_inactive
	xfs: fix uninitialized variable in xfs_attr3_leaf_inactive
	xfs: remove unused variable 'done'
	Revert "drm/amdgpu: use dirty framebuffer helper"
	Makefile.extrawarn: Move -Wcast-function-type-strict to W=1
	docs: update mediator information in CoC docs
	Linux 5.4.217

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I105690ca9b48507da0590cb62dd60db2a95698e6
This commit is contained in:
Greg Kroah-Hartman 2022-10-08 08:39:36 +02:00
commit c01b171cd9
51 changed files with 1058 additions and 560 deletions

View File

@ -4304,6 +4304,18 @@
retain_initrd [RAM] Keep initrd memory after extraction
retbleed= [X86] Control mitigation of RETBleed (Arbitrary
Speculative Code Execution with Return Instructions)
vulnerability.
off - unconditionally disable
auto - automatically select a migitation
Selecting 'auto' will choose a mitigation method at run
time according to the CPU.
Not specifying this option is equivalent to retbleed=auto.
rfkill.default_state=
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
etc. communication is blocked by default.
@ -4547,6 +4559,7 @@
eibrs - enhanced IBRS
eibrs,retpoline - enhanced IBRS + Retpolines
eibrs,lfence - enhanced IBRS + LFENCE
ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.

View File

@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're
uncertain how to handle situations that come up. It will not be
considered a violation report unless you want it to be. If you are
uncertain about approaching the TAB or any other maintainers, please
reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>.
reach out to our conflict mediator, Joanna Lee <joanna.lee@gesmer.com>.
In the end, "be kind to each other" is really what the end goal is for
everybody. We know everyone is human and we all fail at times, but the

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 216
SUBLEVEL = 217
EXTRAVERSION =
NAME = Kleptomaniac Octopus

View File

@ -6,6 +6,8 @@
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/nospec-branch.h>
/*
@ -146,27 +148,19 @@ For 32-bit we have the following conventions - kernel is built with
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
.macro POP_REGS pop_rdi=1
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.if \skip_r11rcx
popq %rsi
.else
popq %r11
.endif
popq %r10
popq %r9
popq %r8
popq %rax
.if \skip_r11rcx
popq %rsi
.else
popq %rcx
.endif
popq %rdx
popq %rsi
.if \pop_rdi
@ -316,6 +310,62 @@ For 32-bit we have the following conventions - kernel is built with
#endif
/*
* IBRS kernel mitigation for Spectre_v2.
*
* Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
* the regs it uses (AX, CX, DX). Must be called before the first RET
* instruction (NOTE! UNTRAIN_RET includes a RET instruction)
*
* The optional argument is used to save/restore the current value,
* which is used on the paranoid paths.
*
* Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
*/
.macro IBRS_ENTER save_reg
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
movl $MSR_IA32_SPEC_CTRL, %ecx
.ifnb \save_reg
rdmsr
shl $32, %rdx
or %rdx, %rax
mov %rax, \save_reg
test $SPEC_CTRL_IBRS, %eax
jz .Ldo_wrmsr_\@
lfence
jmp .Lend_\@
.Ldo_wrmsr_\@:
.endif
movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
movl %edx, %eax
shr $32, %rdx
wrmsr
.Lend_\@:
.endm
/*
* Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
* regs. Must be called after the last RET.
*/
.macro IBRS_EXIT save_reg
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
movl $MSR_IA32_SPEC_CTRL, %ecx
.ifnb \save_reg
mov \save_reg, %rdx
.else
movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
andl $(~SPEC_CTRL_IBRS), %edx
.endif
movl %edx, %eax
shr $32, %rdx
wrmsr
.Lend_\@:
.endm
/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*

View File

@ -750,7 +750,6 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@ -759,7 +758,6 @@ ENTRY(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
popfl

View File

@ -172,6 +172,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
/* IRQs are off. */
movq %rax, %rdi
movq %rsp, %rsi
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
call do_syscall_64 /* returns with IRQs disabled */
TRACE_IRQS_IRETQ /* we're about to change IF */
@ -248,8 +252,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
POP_REGS pop_rdi=0 skip_r11rcx=1
IBRS_EXIT
POP_REGS pop_rdi=0
/*
* Now all regs are restored except RSP and RDI.
@ -301,7 +305,6 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@ -310,7 +313,6 @@ ENTRY(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
popq %r15
@ -622,6 +624,7 @@ GLOBAL(retint_user)
TRACE_IRQS_IRETQ
GLOBAL(swapgs_restore_regs_and_return_to_usermode)
IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@ -1248,7 +1251,13 @@ ENTRY(paranoid_entry)
*/
FENCE_SWAPGS_KERNEL_ENTRY
ret
/*
* Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
* CR3 above, keep the old value in a callee saved register.
*/
IBRS_ENTER save_reg=%r15
RET
END(paranoid_entry)
/*
@ -1276,12 +1285,20 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
/*
* Must restore IBRS state before both CR3 and %GS since we need access
* to the per-CPU x86_spec_ctrl_shadow variable.
*/
IBRS_EXIT save_reg=%r15
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
*/
@ -1301,6 +1318,7 @@ ENTRY(error_entry)
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@ -1356,6 +1374,7 @@ ENTRY(error_entry)
SWAPGS
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
/*
* Pretend that the exception came from user mode: set up pt_regs
@ -1461,6 +1480,8 @@ ENTRY(nmi)
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
IBRS_ENTER
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
@ -1684,6 +1705,9 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
IBRS_EXIT save_reg=%r15
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14

View File

@ -4,7 +4,6 @@
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
@ -17,6 +16,8 @@
#include <linux/linkage.h>
#include <linux/err.h>
#include "calling.h"
.section .entry.text, "ax"
/*
@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat)
xorl %r15d, %r15d /* nospec r15 */
cld
IBRS_ENTER
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@ -253,6 +256,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
*/
TRACE_IRQS_OFF
IBRS_ENTER
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@ -267,6 +272,9 @@ sysret32_from_system_call:
*/
STACKLEAK_ERASE
TRACE_IRQS_ON /* User mode traces as IRQs on. */
IBRS_EXIT
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@ -408,6 +416,7 @@ ENTRY(entry_INT80_compat)
* gate turned them off.
*/
TRACE_IRQS_OFF
IBRS_ENTER
movq %rsp, %rdi
call do_int80_syscall_32

View File

@ -5,15 +5,22 @@
/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
*
* The wildcard initializers are in mod_devicetable.h because
* file2alias needs them. Sigh.
*/
#include <linux/mod_devicetable.h>
/* Get the INTEL_FAM* model defines */
#include <asm/intel-family.h>
/* And the X86_VENDOR_* ones */
#include <asm/processor.h>
/* Centaur FAM6 models */
#define X86_CENTAUR_FAM6_C7_A 0xa
#define X86_CENTAUR_FAM6_C7_D 0xd
#define X86_CENTAUR_FAM6_NANO 0xf
#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
/**
* X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
@ -26,8 +33,11 @@
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* Backport version to keep the SRBDS pile consistant. No shorter variants
* required for this.
* Use only if you need all selectors. Otherwise use one of the shorter
* macros of the X86_MATCH_* family. If there is no matching shorthand
* macro, consider to add one. If you really need to wrap one of the macros
* into another macro at the usage site for good reasons, then please
* start this local macro with X86_MATCH to allow easy grepping.
*/
#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
_steppings, _feature, _data) { \
@ -39,6 +49,120 @@
.driver_data = (unsigned long) _data \
}
/**
* X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@_vendor
* @_family: The family number or X86_FAMILY_ANY
* @_model: The model number, model constant or X86_MODEL_ANY
* @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
* @_data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
* set to wildcards.
*/
#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
X86_STEPPING_ANY, feature, data)
/**
* X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
* @family: The family number or X86_FAMILY_ANY
* @feature: A X86_FEATURE bit
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
* set to wildcards.
*/
#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
X86_MODEL_ANY, feature, data)
/**
* X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
* @feature: A X86_FEATURE bit
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
* set to wildcards.
*/
#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
/**
* X86_MATCH_FEATURE - Macro for matching a CPU feature
* @feature: A X86_FEATURE bit
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
* set to wildcards.
*/
#define X86_MATCH_FEATURE(feature, data) \
X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
/* Transitional to keep the existing code working */
#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL)
/**
* X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
* @family: The family number or X86_FAMILY_ANY
* @model: The model number, model constant or X86_MODEL_ANY
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
* set to wildcards.
*/
#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
X86_FEATURE_ANY, data)
/**
* X86_MATCH_VENDOR_FAM - Match vendor and family
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
* @family: The family number or X86_FAMILY_ANY
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
* set of wildcards.
*/
#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
/**
* X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
* @model: The model name without the INTEL_FAM6_ prefix or ANY
* The model name is expanded to INTEL_FAM6_@model internally
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
*
* The vendor is set to INTEL, the family to 6 and all other missing
* arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
*
* See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
*/
#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
/*
* Match specific microcode revisions.
*

View File

@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@ -286,7 +286,10 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */
#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@ -303,6 +306,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@ -407,7 +411,8 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_MMIO_UNKNOWN X86_BUG(28) /* CPU is too old and its MMIO Stale Data status is unknown */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -35,6 +35,9 @@
* The #define line may optionally include a comment including platform names.
*/
/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
#define INTEL_FAM6_ANY X86_MODEL_ANY
#define INTEL_FAM6_CORE_YONAH 0x0E
#define INTEL_FAM6_CORE2_MEROM 0x0F
@ -126,6 +129,9 @@
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
/* Family 5 */
#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
/* Useful macros */
#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
{ \

View File

@ -47,6 +47,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@ -82,6 +84,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@ -129,6 +132,13 @@
* bit available to control VERW
* behavior.
*/
#define ARCH_CAP_RRSBA BIT(19) /*
* Indicates RET may use predictors
* other than the RSB. With eIBRS
* enabled predictions in kernel mode
* are restricted to targets in
* kernel.
*/
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.

View File

@ -4,11 +4,14 @@
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
#include <linux/frame.h>
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
/*
* This should be used immediately before a retpoline alternative. It tells
@ -60,9 +63,9 @@
lfence; \
jmp 775b; \
774: \
add $(BITS_PER_LONG/8) * 2, sp; \
dec reg; \
jnz 771b; \
add $(BITS_PER_LONG/8) * nr, sp; \
/* barrier for jnz misprediction */ \
lfence;
#else
@ -79,13 +82,6 @@
add $(BITS_PER_LONG/8) * nr, sp;
#endif
#define __ISSUE_UNBALANCED_RET_GUARD(sp) \
call 881f; \
int3; \
881: \
add $(BITS_PER_LONG/8), sp; \
lfence;
#ifdef __ASSEMBLY__
/*
@ -155,26 +151,28 @@
#endif
.endm
.macro ISSUE_UNBALANCED_RET_GUARD ftr:req
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE "jmp .Lskip_pbrsb_\@", \
__stringify(__ISSUE_UNBALANCED_RET_GUARD(%_ASM_SP)) \
\ftr
.Lskip_pbrsb_\@:
.macro ISSUE_UNBALANCED_RET_GUARD
call .Lunbalanced_ret_guard_\@
int3
.Lunbalanced_ret_guard_\@:
add $(BITS_PER_LONG/8), %_ASM_SP
lfence
.endm
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE "jmp .Lskip_rsb_\@", \
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
\ftr
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
.ifb \ftr2
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
.else
ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
.endif
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lunbalanced_\@:
ISSUE_UNBALANCED_RET_GUARD
.Lskip_rsb_\@:
#endif
.endm
#else /* __ASSEMBLY__ */
@ -249,6 +247,7 @@ enum spectre_v2_mitigation {
SPECTRE_V2_EIBRS,
SPECTRE_V2_EIBRS_RETPOLINE,
SPECTRE_V2_EIBRS_LFENCE,
SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@ -312,6 +311,9 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
extern void write_spec_ctrl_current(u64 val, bool force);
extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@ -321,18 +323,16 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
\
preempt_disable(); \
alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
alternative_msr_write(MSR_IA32_SPEC_CTRL, \
spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
u64 val = x86_spec_ctrl_base; \
\
alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
alternative_msr_write(MSR_IA32_SPEC_CTRL, \
spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)

View File

@ -894,12 +894,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
node_reclaim_distance = 32;
#endif
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID.
* Always set it, except when running under a hypervisor.
*/
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
/* Fix up CPUID bits, but only if not virtualised. */
if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
/* Erratum 1076: CPB feature bit not being set in CPUID. */
if (!cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
/*
* Zen3 (Fam19 model < 0x10) parts are not susceptible to
* Branch Type Confusion, but predate the allocation of the
* BTC_NO bit.
*/
if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
set_cpu_cap(c, X86_FEATURE_BTC_NO);
}
}
static void init_amd(struct cpuinfo_x86 *c)

View File

@ -37,6 +37,8 @@
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
static void __init retbleed_select_mitigation(void);
static void __init spectre_v2_user_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
@ -46,16 +48,40 @@ static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
/* The current value of the SPEC_CTRL MSR with task-specific bits set */
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
* The vendor and possibly platform specific bits which can be modified in
* x86_spec_ctrl_base.
* Keep track of the SPEC_CTRL MSR value for the current task, which may differ
* from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
void write_spec_ctrl_current(u64 val, bool force)
{
if (this_cpu_read(x86_spec_ctrl_current) == val)
return;
this_cpu_write(x86_spec_ctrl_current, val);
/*
* When KERNEL_IBRS this MSR is written on return-to-user, unless
* forced the update can be delayed until that time.
*/
if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
u64 spec_ctrl_current(void)
{
return this_cpu_read(x86_spec_ctrl_current);
}
EXPORT_SYMBOL_GPL(spec_ctrl_current);
/*
* AMD specific MSR info for Speculative Store Bypass control.
@ -105,13 +131,21 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
/* Allow STIBP in MSR_SPEC_CTRL if supported */
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
/*
* retbleed_select_mitigation() relies on the state set by
* spectre_v2_select_mitigation(); specifically it wants to know about
* spectre_v2=ibrs.
*/
retbleed_select_mitigation();
/*
* spectre_v2_user_select_mitigation() relies on the state set by
* retbleed_select_mitigation(); specifically the STIBP selection is
* forced for UNRET.
*/
spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
md_clear_select_mitigation();
@ -151,31 +185,17 @@ void __init check_bugs(void)
#endif
}
/*
* NOTE: For VMX, this function is not called in the vmexit path.
* It uses vmx_spec_ctrl_restore_host() instead.
*/
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
u64 msrval, guestval, hostval = x86_spec_ctrl_base;
u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
struct thread_info *ti = current_thread_info();
/* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
/*
* Restrict guest_spec_ctrl to supported values. Clear the
* modifiable bits in the host base value and or the
* modifiable bits from the guest value.
*/
guestval = hostval & ~x86_spec_ctrl_mask;
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
/* Conditional STIBP enabled? */
if (static_branch_unlikely(&switch_to_cond_stibp))
hostval |= stibp_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@ -705,12 +725,103 @@ static int __init nospectre_v1_cmdline(char *str)
}
early_param("nospectre_v1", nospectre_v1_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
#undef pr_fmt
#define pr_fmt(fmt) "RETBleed: " fmt
enum retbleed_mitigation {
RETBLEED_MITIGATION_NONE,
RETBLEED_MITIGATION_IBRS,
RETBLEED_MITIGATION_EIBRS,
};
enum retbleed_mitigation_cmd {
RETBLEED_CMD_OFF,
RETBLEED_CMD_AUTO,
};
const char * const retbleed_strings[] = {
[RETBLEED_MITIGATION_NONE] = "Vulnerable",
[RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
[RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
};
static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
RETBLEED_MITIGATION_NONE;
static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
RETBLEED_CMD_AUTO;
static int __init retbleed_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
if (!strcmp(str, "off"))
retbleed_cmd = RETBLEED_CMD_OFF;
else if (!strcmp(str, "auto"))
retbleed_cmd = RETBLEED_CMD_AUTO;
else
pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
return 0;
}
early_param("retbleed", retbleed_parse_cmdline);
#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n"
#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
static void __init retbleed_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
return;
switch (retbleed_cmd) {
case RETBLEED_CMD_OFF:
return;
case RETBLEED_CMD_AUTO:
default:
/*
* The Intel mitigation (IBRS) was already selected in
* spectre_v2_select_mitigation().
*/
break;
}
switch (retbleed_mitigation) {
default:
break;
}
/*
* Let IBRS trump all on Intel without affecting the effects of the
* retbleed= cmdline option.
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
switch (spectre_v2_enabled) {
case SPECTRE_V2_IBRS:
retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
break;
case SPECTRE_V2_EIBRS:
case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_EIBRS_LFENCE:
retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
break;
default:
pr_err(RETBLEED_INTEL_MSG);
}
}
pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
}
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
SPECTRE_V2_USER_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
@ -740,6 +851,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
@ -781,6 +893,7 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_EIBRS,
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
SPECTRE_V2_CMD_EIBRS_LFENCE,
SPECTRE_V2_CMD_IBRS,
};
enum spectre_v2_user_cmd {
@ -821,13 +934,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
static enum spectre_v2_user_cmd __init
spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
spectre_v2_parse_user_cmdline(void)
{
char arg[20];
int ret, i;
switch (v2_cmd) {
switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
@ -853,15 +968,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
return (mode == SPECTRE_V2_EIBRS ||
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_EIBRS_LFENCE);
return mode == SPECTRE_V2_IBRS ||
mode == SPECTRE_V2_EIBRS ||
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_EIBRS_LFENCE;
}
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
spectre_v2_user_select_mitigation(void)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
@ -874,7 +990,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
cmd = spectre_v2_parse_user_cmdline(v2_cmd);
cmd = spectre_v2_parse_user_cmdline();
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
@ -922,12 +1038,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
}
/*
* If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
* required.
* If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
* STIBP is not required.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
spectre_v2_in_eibrs_mode(spectre_v2_enabled))
spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return;
/*
@ -952,6 +1068,7 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
[SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
static const struct {
@ -969,6 +1086,7 @@ static const struct {
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
{ "ibrs", SPECTRE_V2_CMD_IBRS, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
@ -1031,6 +1149,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
@ -1046,6 +1182,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
u64 ia32_cap;
if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
return;
ia32_cap = x86_read_arch_cap_msr();
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
{
/*
@ -1070,10 +1222,6 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
*/
switch (mode) {
case SPECTRE_V2_NONE:
/* These modes already fill RSB at vmexit */
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_EIBRS_RETPOLINE:
return;
case SPECTRE_V2_EIBRS_LFENCE:
@ -1083,6 +1231,14 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
}
return;
case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_IBRS:
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
return;
}
pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
@ -1113,6 +1269,14 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
retbleed_cmd != RETBLEED_CMD_OFF &&
boot_cpu_has(X86_FEATURE_IBRS) &&
boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
mode = SPECTRE_V2_IBRS;
break;
}
mode = spectre_v2_select_retpoline();
break;
@ -1129,6 +1293,10 @@ static void __init spectre_v2_select_mitigation(void)
mode = spectre_v2_select_retpoline();
break;
case SPECTRE_V2_CMD_IBRS:
mode = SPECTRE_V2_IBRS;
break;
case SPECTRE_V2_CMD_EIBRS:
mode = SPECTRE_V2_EIBRS;
break;
@ -1145,10 +1313,9 @@ static void __init spectre_v2_select_mitigation(void)
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
if (spectre_v2_in_eibrs_mode(mode)) {
/* Force it so VMEXIT will restore correctly */
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
switch (mode) {
@ -1156,6 +1323,12 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_EIBRS:
break;
case SPECTRE_V2_IBRS:
setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
break;
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_EIBRS_LFENCE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
@ -1167,16 +1340,56 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
/*
* Disable alternate RSB predictions in kernel when indirect CALLs and
* JMPs gets protection against BHI and Intramode-BTI, but RET
* prediction from a non-RSB predictor is still a risk.
*/
if (mode == SPECTRE_V2_EIBRS_LFENCE ||
mode == SPECTRE_V2_EIBRS_RETPOLINE ||
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/*
* If spectre v2 protection has been enabled, unconditionally fill
* RSB during a context switch; this protects against two independent
* issues:
* If Spectre v2 protection has been enabled, fill the RSB during a
* context switch. In general there are two types of RSB attacks
* across context switches, for which the CALLs/RETs may be unbalanced.
*
* - RSB underflow (and switch to BTB) on Skylake+
* - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
* 1) RSB underflow
*
* Some Intel parts have "bottomless RSB". When the RSB is empty,
* speculated return targets may come from the branch predictor,
* which could have a user-poisoned BTB or BHB entry.
*
* AMD has it even worse: *all* returns are speculated from the BTB,
* regardless of the state of the RSB.
*
* When IBRS or eIBRS is enabled, the "user -> kernel" attack
* scenario is mitigated by the IBRS branch prediction isolation
* properties, so the RSB buffer filling wouldn't be necessary to
* protect against this type of attack.
*
* The "user -> user" attack scenario is mitigated by RSB filling.
*
* 2) Poisoned RSB entry
*
* If the 'next' in-kernel return stack is shorter than 'prev',
* 'next' could be tricked into speculating with a user-poisoned RSB
* entry.
*
* The "user -> kernel" attack scenario is mitigated by SMEP and
* eIBRS.
*
* The "user -> user" scenario, also known as SpectreBHB, requires
* RSB clearing.
*
* So to mitigate all cases, unconditionally fill RSB on context
* switches.
*
* FIXME: Is this pointless for retbleed-affected AMD?
*/
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
@ -1184,28 +1397,29 @@ static void __init spectre_v2_select_mitigation(void)
spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. Enhanced IBRS protects firmware too, so, enable restricted
* speculation around firmware calls only when Enhanced IBRS isn't
* supported.
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
* and Enhanced IBRS protect firmware too, so enable IBRS around
* firmware calls only when IBRS / Enhanced IBRS aren't otherwise
* enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
spectre_v2_user_select_mitigation(cmd);
spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
{
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
write_spec_ctrl_current(val, true);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@ -1421,16 +1635,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
break;
}
/*
* If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
* bit in the mask to allow guests to use the mitigation even in the
* case where the host does not enable it.
*/
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD)) {
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
}
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
@ -1448,7 +1652,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
}
@ -1665,7 +1869,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
write_spec_ctrl_current(x86_spec_ctrl_base, true);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
@ -1900,7 +2104,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@ -1934,7 +2138,7 @@ static char *pbrsb_eibrs_state(void)
{
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RETPOLINE))
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
return ", PBRSB-eIBRS: SW sequence";
else
return ", PBRSB-eIBRS: Vulnerable";
@ -1970,6 +2174,11 @@ static ssize_t srbds_show_state(char *buf)
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
static ssize_t retbleed_show_state(char *buf)
{
return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@ -2016,6 +2225,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MMIO_UNKNOWN:
return mmio_stale_data_show_state(buf);
case X86_BUG_RETBLEED:
return retbleed_show_state(buf);
default:
break;
}
@ -2075,4 +2287,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
else
return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
}
#endif

View File

@ -1102,48 +1102,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
#define VULNBL(vendor, family, model, blacklist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
#define VULNBL_AMD(family, blacklist) \
VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
#define VULNBL_HYGON(family, blacklist) \
VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
#define SRBDS BIT(0)
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
#define MMIO BIT(1)
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
#define MMIO_SBDS BIT(2)
/* CPU is affected by RETbleed, speculating where you would not expect it */
#define RETBLEED BIT(3)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
BIT(7) | BIT(0xB), MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED),
VULNBL_HYGON(0x18, RETBLEED),
{}
};
@ -1251,6 +1263,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))

View File

@ -16,12 +16,17 @@
* respective wildcard entries.
*
* A typical table entry would be to match a specific CPU
* { X86_VENDOR_INTEL, 6, 0x12 }
* or to match a specific CPU feature
* { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
*
* X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
* X86_FEATURE_ANY, NULL);
*
* Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
* %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
* %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
*
* asm/cpu_device_id.h contains a set of useful macros which are shortcuts
* for various common selections. The above can be shortened to:
*
* X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL);
*
* Arrays used to match for this should also be declared using
* MODULE_DEVICE_TABLE(x86cpu, ...)

View File

@ -26,6 +26,7 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },

View File

@ -449,7 +449,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
write_spec_ctrl_current(msr, false);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)

View File

@ -47,6 +47,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/virtext.h>
#include "trace.h"

View File

@ -11,6 +11,7 @@
#include "mmu.h"
#include "nested.h"
#include "trace.h"
#include "vmx.h"
#include "x86.h"
static bool __read_mostly enable_shadow_vmcs = 1;
@ -2863,35 +2864,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->host_state.cr4 = cr4;
}
asm(
"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
"cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
"je 1f \n\t"
__ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
"mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
"1: \n\t"
"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
/* Check if vmlaunch or vmresume is needed */
"cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
/*
* VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
* RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
* Valid. vmx_vmenter() directly "returns" RFLAGS, and so the
* results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
*/
"call vmx_vmenter\n\t"
CC_SET(be)
: ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
: [HOST_RSP]"r"((unsigned long)HOST_RSP),
[loaded_vmcs]"r"(vmx->loaded_vmcs),
[launched]"i"(offsetof(struct loaded_vmcs, launched)),
[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
[wordsize]"i"(sizeof(ulong))
: "memory"
);
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
__vmx_vcpu_run_flags(vmx));
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);

View File

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
#define VMX_RUN_VMRESUME (1 << 0)
#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */

View File

@ -4,6 +4,7 @@
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
@ -29,78 +30,12 @@
.text
/**
* vmx_vmenter - VM-Enter the current loaded VMCS
*
* %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
*
* Returns:
* %RFLAGS.CF is set on VM-Fail Invalid
* %RFLAGS.ZF is set on VM-Fail Valid
* %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
*
* Note that VMRESUME/VMLAUNCH fall-through and return directly if
* they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
* to vmx_vmexit.
*/
ENTRY(vmx_vmenter)
/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
je 2f
1: vmresume
ret
2: vmlaunch
ret
3: cmpb $0, kvm_rebooting
je 4f
ret
4: ud2
.pushsection .fixup, "ax"
5: jmp 3b
.popsection
_ASM_EXTABLE(1b, 5b)
_ASM_EXTABLE(2b, 5b)
ENDPROC(vmx_vmenter)
/**
* vmx_vmexit - Handle a VMX VM-Exit
*
* Returns:
* %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
*
* This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
* here after hardware loads the host's state, i.e. this is the destination
* referred to by VMCS.HOST_RIP.
*/
ENTRY(vmx_vmexit)
#ifdef CONFIG_RETPOLINE
ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
/* Preserve guest's RAX, it's used to stuff the RSB. */
push %_ASM_AX
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
/* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
or $1, %_ASM_AX
pop %_ASM_AX
.Lvmexit_skip_rsb:
#endif
ISSUE_UNBALANCED_RET_GUARD X86_FEATURE_RSB_VMEXIT_LITE
ret
ENDPROC(vmx_vmexit)
/**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
* @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
* @vmx: struct vcpu_vmx *
* @regs: unsigned long * (to guest registers)
* @launched: %true if the VMCS has been launched
* @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
* VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
*
* Returns:
* 0 on VM-Exit, 1 on VM-Fail
@ -119,24 +54,29 @@ ENTRY(__vmx_vcpu_run)
#endif
push %_ASM_BX
/* Save @vmx for SPEC_CTRL handling */
push %_ASM_ARG1
/* Save @flags for SPEC_CTRL handling */
push %_ASM_ARG3
/*
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
* @regs is needed after VM-Exit to save the guest's register values.
*/
push %_ASM_ARG2
/* Copy @launched to BL, _ASM_ARG3 is volatile. */
/* Copy @flags to BL, _ASM_ARG3 is volatile. */
mov %_ASM_ARG3B, %bl
/* Adjust RSP to account for the CALL to vmx_vmenter(). */
lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
lea (%_ASM_SP), %_ASM_ARG2
call vmx_update_host_rsp
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
cmpb $0, %bl
testb $VMX_RUN_VMRESUME, %bl
/* Load guest registers. Don't clobber flags. */
mov VCPU_RBX(%_ASM_AX), %_ASM_BX
@ -158,11 +98,25 @@ ENTRY(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
/* Enter guest mode */
call vmx_vmenter
/* Check EFLAGS.ZF from 'testb' above */
jz .Lvmlaunch
/* Jump on VM-Fail. */
jbe 2f
/*
* If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
* the 'vmx_vmexit' label below.
*/
.Lvmresume:
vmresume
jmp .Lvmfail
.Lvmlaunch:
vmlaunch
jmp .Lvmfail
_ASM_EXTABLE(.Lvmresume, .Lfixup)
_ASM_EXTABLE(.Lvmlaunch, .Lfixup)
SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
/* Temporarily save guest's RAX. */
push %_ASM_AX
@ -189,19 +143,21 @@ ENTRY(__vmx_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
/* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
xor %eax, %eax
/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
xor %ebx, %ebx
.Lclear_regs:
/*
* Clear all general purpose registers except RSP and RAX to prevent
* Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as RSP is restored by hardware during
* VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
* VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
* value.
*/
1: xor %ebx, %ebx
xor %eax, %eax
xor %ecx, %ecx
xor %edx, %edx
xor %esi, %esi
@ -220,8 +176,32 @@ ENTRY(__vmx_vcpu_run)
/* "POP" @regs. */
add $WORD_SIZE, %_ASM_SP
pop %_ASM_BX
/*
* IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
* the first unbalanced RET after vmexit!
*
* For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
* entries and (in some cases) RSB underflow.
*
* eIBRS has its own protection against poisoned RSB, so it doesn't
* need the RSB filling sequence. But it does need to be enabled, and a
* single call to retire, before the first unbalanced RET.
*/
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE
pop %_ASM_ARG2 /* @flags */
pop %_ASM_ARG1 /* @vmx */
call vmx_spec_ctrl_restore_host
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
pop %_ASM_BX
#ifdef CONFIG_X86_64
pop %r12
pop %r13
@ -234,11 +214,20 @@ ENTRY(__vmx_vcpu_run)
pop %_ASM_BP
ret
/* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
2: mov $1, %eax
jmp 1b
.Lfixup:
cmpb $0, kvm_rebooting
jne .Lvmfail
ud2
.Lvmfail:
/* VM-Fail: set return value to 1 */
mov $1, %_ASM_BX
jmp .Lclear_regs
ENDPROC(__vmx_vcpu_run)
.section .text, "ax"
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed

View File

@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/cpu_device_id.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
@ -358,9 +359,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
if (!vmx->disable_fb_clear)
return;
rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
msr |= FB_CLEAR_DIS;
wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
/* Cache the MSR value to avoid reading it later */
vmx->msr_ia32_mcu_opt_ctrl = msr;
}
@ -371,7 +372,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
return;
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
}
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
@ -862,6 +863,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
return true;
}
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
{
unsigned int flags = 0;
if (vmx->loaded_vmcs->launched)
flags |= VMX_RUN_VMRESUME;
/*
* If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
* to change it directly without causing a vmexit. In that case read
* it after vmexit and store it in vmx->spec_ctrl.
*/
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
flags |= VMX_RUN_SAVE_SPEC_CTRL;
return flags;
}
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@ -6539,7 +6558,30 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
}
}
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
unsigned int flags)
{
u64 hostval = this_cpu_read(x86_spec_ctrl_current);
if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
return;
if (flags & VMX_RUN_SAVE_SPEC_CTRL)
vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
/*
* If the guest/host SPEC_CTRL values differ, restore the host value.
*
* For legacy IBRS, the IBRS bit always needs to be written after
* transitioning from a less privileged predictor mode, regardless of
* whether the guest/host values differ.
*/
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
vmx->spec_ctrl != hostval)
native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
barrier_nospec();
}
static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
@ -6628,32 +6670,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
vmx->loaded_vmcs->launched);
__vmx_vcpu_run_flags(vmx));
vcpu->arch.cr2 = read_cr2();
vmx_enable_fb_clear(vmx);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
* turn it off. This is much more efficient than blindly adding
* it to the atomic save/restore list. Especially as the former
* (Saving guest MSRs on vmexit) doesn't even exist in KVM.
*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
* save it.
*
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
current_evmcs->hv_clean_fields |=

View File

@ -10,6 +10,7 @@
#include "capabilities.h"
#include "ops.h"
#include "vmcs.h"
#include "run_flags.h"
extern const u32 vmx_msr_index[];
extern u64 host_efer;
@ -336,6 +337,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
unsigned int flags);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1

View File

@ -10329,9 +10329,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
bool kvm_arch_has_assigned_device(struct kvm *kvm)
bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
return atomic_read(&kvm->arch.assigned_device_count);
return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);

View File

@ -574,6 +574,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_retbleed(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sysfs_emit(buf, "Not affected\n");
}
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@ -584,6 +590,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@ -596,6 +603,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
&dev_attr_mmio_stale_data.attr,
&dev_attr_retbleed.attr,
NULL
};

View File

@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");

View File

@ -18,6 +18,7 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
#include "cpufreq_ondemand.h"

View File

@ -35,7 +35,6 @@
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_damage_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
@ -496,7 +495,6 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
.dirty = drm_atomic_helper_dirtyfb,
};
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,

View File

@ -46,11 +46,13 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
@ -97,6 +99,12 @@ static struct cpuidle_state *cpuidle_state_table;
*/
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
/*
* Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
* above.
*/
#define CPUIDLE_FLAG_IBRS BIT(16)
/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
@ -107,6 +115,24 @@ static struct cpuidle_state *cpuidle_state_table;
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
bool smt_active = sched_smt_active();
u64 spec_ctrl = spec_ctrl_current();
int ret;
if (smt_active)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
ret = intel_idle(dev, drv, index);
if (smt_active)
wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
return ret;
}
/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
@ -605,7 +631,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@ -613,7 +639,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@ -621,7 +647,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C8",
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@ -629,7 +655,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C9",
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@ -637,7 +663,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C10",
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@ -666,7 +692,7 @@ static struct cpuidle_state skx_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@ -1372,6 +1398,11 @@ static void __init intel_idle_cpuidle_driver_init(void)
drv->states[drv->state_count] = /* structure copy */
cpuidle_state_table[cstate];
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
drv->states[drv->state_count].enter = intel_idle_ibrs;
}
drv->state_count += 1;
}

View File

@ -1007,7 +1007,7 @@ restart:
* The INCOMPLETE flag means that we will find the "old"
* attr, not the "new" one.
*/
args->flags |= XFS_ATTR_INCOMPLETE;
args->op_flags |= XFS_DA_OP_INCOMPLETE;
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;

View File

@ -2345,8 +2345,8 @@ xfs_attr3_leaf_lookup_int(
* If we are looking for INCOMPLETE entries, show only those.
* If we are looking for complete entries, show only those.
*/
if ((args->flags & XFS_ATTR_INCOMPLETE) !=
(entry->flags & XFS_ATTR_INCOMPLETE)) {
if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) !=
!!(entry->flags & XFS_ATTR_INCOMPLETE)) {
continue;
}
if (entry->flags & XFS_ATTR_LOCAL) {

View File

@ -17,13 +17,27 @@ struct xfs_inode;
struct xfs_trans;
/*
* Used to keep a list of "remote value" extents when unlinking an inode.
* Incore version of the attribute leaf header.
*/
typedef struct xfs_attr_inactive_list {
xfs_dablk_t valueblk; /* block number of value bytes */
int valuelen; /* number of bytes in value */
} xfs_attr_inactive_list_t;
struct xfs_attr3_icleaf_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t usedbytes;
/*
* Firstused is 32-bit here instead of 16-bit like the on-disk variant
* to support maximum fsb size of 64k without overflow issues throughout
* the attr code. Instead, the overflow condition is handled on
* conversion to/from disk.
*/
uint32_t firstused;
__u8 holes;
struct {
uint16_t base;
uint16_t size;
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
/*========================================================================
* Function prototypes for the kernel.

View File

@ -24,6 +24,23 @@
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
/*
* Remote Attribute Values
* =======================
*
* Remote extended attribute values are conceptually simple -- they're written
* to data blocks mapped by an inode's attribute fork, and they have an upper
* size limit of 64k. Setting a value does not involve the XFS log.
*
* However, on a v5 filesystem, maximally sized remote attr values require one
* block more than 64k worth of space to hold both the remote attribute value
* header (64 bytes). On a 4k block filesystem this results in a 68k buffer;
* on a 64k block filesystem, this would be a 128k buffer. Note that the log
* format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k).
* Therefore, we /must/ ensure that remote attribute value buffers never touch
* the logging system and therefore never have a log item.
*/
/*
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
@ -400,17 +417,25 @@ xfs_attr_rmtval_get(
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_trans_read_buf(mp, args->trans,
mp->m_ddev_targp,
dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0,
&xfs_attr3_rmt_buf_ops);
if (!bp)
return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
/* bad CRC means corrupted metadata */
if (error == -EFSBADCRC)
error = -EFSCORRUPTED;
return error;
}
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
xfs_trans_brelse(args->trans, bp);
xfs_buf_relse(bp);
if (error)
return error;
@ -551,6 +576,32 @@ xfs_attr_rmtval_set(
return 0;
}
/* Mark stale any incore buffers for the remote value. */
int
xfs_attr_rmtval_stale(
struct xfs_inode *ip,
struct xfs_bmbt_irec *map,
xfs_buf_flags_t incore_flags)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_buf *bp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
(map->br_startblock != HOLESTARTBLOCK));
bp = xfs_buf_incore(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map->br_startblock),
XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags);
if (bp) {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
return 0;
}
/*
* Remove the value associated with an attribute by deleting the
* out-of-line buffer that it is stored on.
@ -559,7 +610,6 @@ int
xfs_attr_rmtval_remove(
struct xfs_da_args *args)
{
struct xfs_mount *mp = args->dp->i_mount;
xfs_dablk_t lblkno;
int blkcnt;
int error;
@ -574,9 +624,6 @@ xfs_attr_rmtval_remove(
blkcnt = args->rmtblkcnt;
while (blkcnt > 0) {
struct xfs_bmbt_irec map;
struct xfs_buf *bp;
xfs_daddr_t dblkno;
int dblkcnt;
int nmap;
/*
@ -588,21 +635,9 @@ xfs_attr_rmtval_remove(
if (error)
return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
/*
* If the "remote" value is in the cache, remove it.
*/
bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
if (bp) {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK);
if (error)
return error;
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;

View File

@ -11,5 +11,7 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_set(struct xfs_da_args *args);
int xfs_attr_rmtval_remove(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
xfs_buf_flags_t incore_flags);
#endif /* __XFS_ATTR_REMOTE_H__ */

View File

@ -82,6 +82,7 @@ typedef struct xfs_da_args {
#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
#define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */
#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
@ -89,7 +90,8 @@ typedef struct xfs_da_args {
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
{ XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }
{ XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \
{ XFS_DA_OP_INCOMPLETE, "INCOMPLETE" }
/*
* Storage for holding state during Btree searches and split/join ops.
@ -124,6 +126,19 @@ typedef struct xfs_da_state {
/* for dirv2 extrablk is data */
} xfs_da_state_t;
/*
* In-core version of the node header to abstract the differences in the v2 and
* v3 disk format of the headers. Callers need to convert to/from disk format as
* appropriate.
*/
struct xfs_da3_icnode_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t level;
};
/*
* Utility macros to aid in logging changed structure fields.
*/

View File

@ -13,6 +13,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
/*
* Shortform directory ops

View File

@ -93,19 +93,6 @@ struct xfs_da3_intnode {
struct xfs_da_node_entry __btree[];
};
/*
* In-core version of the node header to abstract the differences in the v2 and
* v3 disk format of the headers. Callers need to convert to/from disk format as
* appropriate.
*/
struct xfs_da3_icnode_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t level;
};
/*
* Directory version 2.
*
@ -434,14 +421,6 @@ struct xfs_dir3_leaf_hdr {
__be32 pad; /* 64 bit alignment */
};
struct xfs_dir3_icleaf_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t stale;
};
/*
* Leaf block entry.
*/
@ -520,19 +499,6 @@ struct xfs_dir3_free {
#define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
/*
* In core version of the free block header, abstracted away from on-disk format
* differences. Use this in the code, and convert to/from the disk version using
* xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
*/
struct xfs_dir3_icfree_hdr {
uint32_t magic;
uint32_t firstdb;
uint32_t nvalid;
uint32_t nused;
};
/*
* Single block format.
*
@ -709,29 +675,6 @@ struct xfs_attr3_leafblock {
*/
};
/*
* incore, neutral version of the attribute leaf header
*/
struct xfs_attr3_icleaf_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t usedbytes;
/*
* firstused is 32-bit here instead of 16-bit like the on-disk variant
* to support maximum fsb size of 64k without overflow issues throughout
* the attr code. Instead, the overflow condition is handled on
* conversion to/from disk.
*/
uint32_t firstused;
__u8 holes;
struct {
uint16_t base;
uint16_t size;
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
/*
* Special value to represent fs block size in the leaf header firstused field.
* Only used when block size overflows the 2-bytes available on disk.
@ -740,8 +683,6 @@ struct xfs_attr3_icleaf_hdr {
/*
* Flags used in the leaf_entry[i].flags field.
* NOTE: the INCOMPLETE bit must not collide with the flags bits specified
* on the system call, they are "or"ed together for various operations.
*/
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */

View File

@ -18,6 +18,8 @@ struct xfs_dir2_sf_entry;
struct xfs_dir2_data_hdr;
struct xfs_dir2_data_entry;
struct xfs_dir2_data_unused;
struct xfs_dir3_icfree_hdr;
struct xfs_dir3_icleaf_hdr;
extern struct xfs_name xfs_name_dotdot;

View File

@ -8,6 +8,25 @@
struct dir_context;
/*
* In-core version of the leaf and free block headers to abstract the
* differences in the v2 and v3 disk format of the headers.
*/
struct xfs_dir3_icleaf_hdr {
uint32_t forw;
uint32_t back;
uint16_t magic;
uint16_t count;
uint16_t stale;
};
struct xfs_dir3_icfree_hdr {
uint32_t magic;
uint32_t firstdb;
uint32_t nvalid;
uint32_t nused;
};
/* xfs_dir2.c */
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);

View File

@ -1540,6 +1540,13 @@ typedef struct xfs_bmdr_block {
#define BMBT_BLOCKCOUNT_BITLEN 21
#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
#define BMBT_BLOCKCOUNT_MASK ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1)
/*
* bmbt records have a file offset (block) field that is 54 bits wide, so this
* is the largest xfs_fileoff_t that we ever expect to see.
*/
#define XFS_MAX_FILEOFF (BMBT_STARTOFF_MASK + BMBT_BLOCKCOUNT_MASK)
typedef struct xfs_bmbt_rec {
__be64 l0, l1;

View File

@ -25,22 +25,18 @@
#include "xfs_error.h"
/*
* Look at all the extents for this logical region,
* invalidate any buffers that are incore/in transactions.
* Invalidate any incore buffers associated with this remote attribute value
* extent. We never log remote attribute value buffers, which means that they
* won't be attached to a transaction and are therefore safe to mark stale.
* The actual bunmapi will be taken care of later.
*/
STATIC int
xfs_attr3_leaf_freextent(
struct xfs_trans **trans,
xfs_attr3_rmt_stale(
struct xfs_inode *dp,
xfs_dablk_t blkno,
int blkcnt)
{
struct xfs_bmbt_irec map;
struct xfs_buf *bp;
xfs_dablk_t tblkno;
xfs_daddr_t dblkno;
int tblkcnt;
int dblkcnt;
int nmap;
int error;
@ -48,47 +44,28 @@ xfs_attr3_leaf_freextent(
* Roll through the "value", invalidating the attribute value's
* blocks.
*/
tblkno = blkno;
tblkcnt = blkcnt;
while (tblkcnt > 0) {
while (blkcnt > 0) {
/*
* Try to remember where we decided to put the value.
*/
nmap = 1;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
error = xfs_bmapi_read(dp, (xfs_fileoff_t)blkno, blkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
if (error)
return error;
}
ASSERT(nmap == 1);
ASSERT(map.br_startblock != DELAYSTARTBLOCK);
/*
* If it's a hole, these are already unmapped
* so there's nothing to invalidate.
* Mark any incore buffers for the remote value as stale. We
* never log remote attr value buffers, so the buffer should be
* easy to kill.
*/
if (map.br_startblock != HOLESTARTBLOCK) {
error = xfs_attr_rmtval_stale(dp, &map, 0);
if (error)
return error;
dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
map.br_startblock);
dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
map.br_blockcount);
bp = xfs_trans_get_buf(*trans,
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, 0);
if (!bp)
return -ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
*/
error = xfs_trans_roll_inode(trans, dp);
if (error)
return error;
}
tblkno += map.br_blockcount;
tblkcnt -= map.br_blockcount;
blkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
}
return 0;
@ -102,86 +79,45 @@ xfs_attr3_leaf_freextent(
*/
STATIC int
xfs_attr3_leaf_inactive(
struct xfs_trans **trans,
struct xfs_inode *dp,
struct xfs_buf *bp)
struct xfs_trans **trans,
struct xfs_inode *dp,
struct xfs_buf *bp)
{
struct xfs_attr_leafblock *leaf;
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entry;
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_mount *mp = bp->b_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entry;
struct xfs_attr_leaf_name_remote *name_rmt;
struct xfs_attr_inactive_list *list;
struct xfs_attr_inactive_list *lp;
int error;
int count;
int size;
int tmp;
int i;
struct xfs_mount *mp = bp->b_mount;
int error = 0;
int i;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
/*
* Count the number of "remote" value extents.
* Find the remote value extents for this leaf and invalidate their
* incore buffers.
*/
count = 0;
entry = xfs_attr3_leaf_entryp(leaf);
for (i = 0; i < ichdr.count; entry++, i++) {
if (be16_to_cpu(entry->nameidx) &&
((entry->flags & XFS_ATTR_LOCAL) == 0)) {
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
if (name_rmt->valueblk)
count++;
}
int blkcnt;
if (!entry->nameidx || (entry->flags & XFS_ATTR_LOCAL))
continue;
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
if (!name_rmt->valueblk)
continue;
blkcnt = xfs_attr3_rmt_blocks(dp->i_mount,
be32_to_cpu(name_rmt->valuelen));
error = xfs_attr3_rmt_stale(dp,
be32_to_cpu(name_rmt->valueblk), blkcnt);
if (error)
goto err;
}
/*
* If there are no "remote" values, we're done.
*/
if (count == 0) {
xfs_trans_brelse(*trans, bp);
return 0;
}
/*
* Allocate storage for a list of all the "remote" value extents.
*/
size = count * sizeof(xfs_attr_inactive_list_t);
list = kmem_alloc(size, 0);
/*
* Identify each of the "remote" value extents.
*/
lp = list;
entry = xfs_attr3_leaf_entryp(leaf);
for (i = 0; i < ichdr.count; entry++, i++) {
if (be16_to_cpu(entry->nameidx) &&
((entry->flags & XFS_ATTR_LOCAL) == 0)) {
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
if (name_rmt->valueblk) {
lp->valueblk = be32_to_cpu(name_rmt->valueblk);
lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
be32_to_cpu(name_rmt->valuelen));
lp++;
}
}
}
xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */
/*
* Invalidate each of the "remote" value extents.
*/
error = 0;
for (lp = list, i = 0; i < count; i++, lp++) {
tmp = xfs_attr3_leaf_freextent(trans, dp,
lp->valueblk, lp->valuelen);
if (error == 0)
error = tmp; /* save only the 1st errno */
}
kmem_free(list);
xfs_trans_brelse(*trans, bp);
err:
return error;
}

View File

@ -187,7 +187,12 @@ xfs_file_dio_aio_read(
file_accessed(iocb->ki_filp);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
return -EAGAIN;
} else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);

View File

@ -1513,10 +1513,8 @@ xfs_itruncate_extents_flags(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
xfs_fileoff_t first_unmap_block;
xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
int error = 0;
int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
@ -1536,21 +1534,22 @@ xfs_itruncate_extents_flags(
* the end of the file (in a crash where the space is allocated
* but the inode size is not yet updated), simply remove any
* blocks which show up between the new EOF and the maximum
* possible file size. If the first block to be removed is
* beyond the maximum file size (ie it is the same as last_block),
* then there is nothing to do.
* possible file size.
*
* We have to free all the blocks to the bmbt maximum offset, even if
* the page cache can't scale that far.
*/
first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (first_unmap_block == last_block)
if (first_unmap_block >= XFS_MAX_FILEOFF) {
WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
return 0;
}
ASSERT(first_unmap_block < last_block);
unmap_len = last_block - first_unmap_block + 1;
while (!done) {
unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
while (unmap_len > 0) {
ASSERT(tp->t_firstblock == NULLFSBLOCK);
error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
XFS_ITRUNC_MAX_EXTENTS, &done);
error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
flags, XFS_ITRUNC_MAX_EXTENTS);
if (error)
goto out;
@ -1570,7 +1569,7 @@ xfs_itruncate_extents_flags(
if (whichfork == XFS_DATA_FORK) {
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp,
first_unmap_block, last_block, true);
first_unmap_block, XFS_MAX_FILEOFF, true);
if (error)
goto out;

View File

@ -1544,7 +1544,8 @@ xfs_reflink_clear_inode_flag(
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF,
true);
if (error)
return error;

View File

@ -512,32 +512,6 @@ xfs_showargs(
seq_puts(m, ",noquota");
}
static uint64_t
xfs_max_file_offset(
unsigned int blockshift)
{
unsigned int pagefactor = 1;
unsigned int bitshift = BITS_PER_LONG - 1;
/* Figure out maximum filesize, on Linux this can depend on
* the filesystem blocksize (on 32 bit platforms).
* __block_write_begin does this in an [unsigned] long long...
* page->index << (PAGE_SHIFT - bbits)
* So, for page sized blocks (4K on 32 bit platforms),
* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
* (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
* but for smaller blocksizes it is less (bbits = log2 bsize).
*/
#if BITS_PER_LONG == 32
ASSERT(sizeof(sector_t) == 8);
pagefactor = PAGE_SIZE;
bitshift = BITS_PER_LONG;
#endif
return (((uint64_t)pagefactor) << bitshift) - 1;
}
/*
* Set parameters for inode allocation heuristics, taking into account
* filesystem size and inode32/inode64 mount options; i.e. specifically
@ -1650,6 +1624,26 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
/*
* XFS block mappings use 54 bits to store the logical block offset.
* This should suffice to handle the maximum file size that the VFS
* supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
* bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
* calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
* to check this assertion.
*
* Avoid integer overflow by comparing the maximum bmbt offset to the
* maximum pagecache offset in units of fs blocks.
*/
if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) {
xfs_warn(mp,
"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
XFS_MAX_FILEOFF);
error = -EINVAL;
goto out_free_sb;
}
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
@ -1661,7 +1655,7 @@ xfs_fs_fill_super(
sb->s_magic = XFS_SUPER_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
sb->s_time_min = S32_MIN;

View File

@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr,
extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
struct device_attribute *attr,
char *buf);
extern ssize_t cpu_show_retbleed(struct device *dev,
struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,

View File

@ -955,7 +955,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm)
{
}
static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return false;
}

View File

@ -672,9 +672,7 @@ struct x86_cpu_id {
__u16 steppings;
};
#define X86_FEATURE_MATCH(x) \
{ X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x }
/* Wild cards for x86_cpu_id::vendor, family, model and feature */
#define X86_VENDOR_ANY 0xffff
#define X86_FAMILY_ANY 0
#define X86_MODEL_ANY 0

View File

@ -50,6 +50,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-format-zero-length
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict)
endif
endif

View File

@ -284,7 +284,7 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */