f3d2278a81
[ Upstream commit 77bbbc0cf84834ed130838f7ac1988567f4d0288 ] The POWER9 vCPU TLB management code assumes all threads in a core share a TLB, and that TLBIEL execued by one thread will invalidate TLBs for all threads. This is not the case for SMT8 capable POWER9 and POWER10 (big core) processors, where the TLB is split between groups of threads. This results in TLB multi-hits, random data corruption, etc. Fix this by introducing cpu_first_tlb_thread_sibling etc., to determine which siblings share TLBs, and use that in the guest TLB flushing code. [npiggin@gmail.com: add changelog and comment] Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210602040441.3984352-1-npiggin@gmail.com Signed-off-by: Sasha Levin <sashal@kernel.org>
150 lines
3.8 KiB
C
150 lines
3.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_CPUTHREADS_H
|
|
#define _ASM_POWERPC_CPUTHREADS_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/cpumask.h>
|
|
#include <asm/cpu_has_feature.h>
|
|
|
|
/*
|
|
* Mapping of threads to cores
|
|
*
|
|
* Note: This implementation is limited to a power of 2 number of
|
|
* threads per core and the same number for each core in the system
|
|
* (though it would work if some processors had less threads as long
|
|
* as the CPU numbers are still allocated, just not brought online).
|
|
*
|
|
* However, the API allows for a different implementation in the future
|
|
* if needed, as long as you only use the functions and not the variables
|
|
* directly.
|
|
*/
|
|
|
|
#ifdef CONFIG_SMP
|
|
extern int threads_per_core;
|
|
extern int threads_per_subcore;
|
|
extern int threads_shift;
|
|
extern bool has_big_cores;
|
|
extern cpumask_t threads_core_mask;
|
|
#else
|
|
#define threads_per_core 1
|
|
#define threads_per_subcore 1
|
|
#define threads_shift 0
|
|
#define has_big_cores 0
|
|
#define threads_core_mask (*get_cpu_mask(0))
|
|
#endif
|
|
|
|
/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
|
|
* hit by the argument
|
|
*
|
|
* @threads: a cpumask of online threads
|
|
*
|
|
* This function returns a cpumask which will have one online cpu's
|
|
* bit set for each core that has at least one thread set in the argument.
|
|
*
|
|
* This can typically be used for things like IPI for tlb invalidations
|
|
* since those need to be done only once per core/TLB
|
|
*/
|
|
static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
|
|
{
|
|
cpumask_t tmp, res;
|
|
int i, cpu;
|
|
|
|
cpumask_clear(&res);
|
|
for (i = 0; i < NR_CPUS; i += threads_per_core) {
|
|
cpumask_shift_left(&tmp, &threads_core_mask, i);
|
|
if (cpumask_intersects(threads, &tmp)) {
|
|
cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
|
|
if (cpu < nr_cpu_ids)
|
|
cpumask_set_cpu(cpu, &res);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static inline int cpu_nr_cores(void)
|
|
{
|
|
return nr_cpu_ids >> threads_shift;
|
|
}
|
|
|
|
static inline cpumask_t cpu_online_cores_map(void)
|
|
{
|
|
return cpu_thread_mask_to_cores(cpu_online_mask);
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
int cpu_core_index_of_thread(int cpu);
|
|
int cpu_first_thread_of_core(int core);
|
|
#else
|
|
static inline int cpu_core_index_of_thread(int cpu) { return cpu; }
|
|
static inline int cpu_first_thread_of_core(int core) { return core; }
|
|
#endif
|
|
|
|
static inline int cpu_thread_in_core(int cpu)
|
|
{
|
|
return cpu & (threads_per_core - 1);
|
|
}
|
|
|
|
static inline int cpu_thread_in_subcore(int cpu)
|
|
{
|
|
return cpu & (threads_per_subcore - 1);
|
|
}
|
|
|
|
static inline int cpu_first_thread_sibling(int cpu)
|
|
{
|
|
return cpu & ~(threads_per_core - 1);
|
|
}
|
|
|
|
static inline int cpu_last_thread_sibling(int cpu)
|
|
{
|
|
return cpu | (threads_per_core - 1);
|
|
}
|
|
|
|
/*
|
|
* tlb_thread_siblings are siblings which share a TLB. This is not
|
|
* architected, is not something a hypervisor could emulate and a future
|
|
* CPU may change behaviour even in compat mode, so this should only be
|
|
* used on PowerNV, and only with care.
|
|
*/
|
|
static inline int cpu_first_tlb_thread_sibling(int cpu)
|
|
{
|
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
|
return cpu & ~0x6; /* Big Core */
|
|
else
|
|
return cpu_first_thread_sibling(cpu);
|
|
}
|
|
|
|
static inline int cpu_last_tlb_thread_sibling(int cpu)
|
|
{
|
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
|
return cpu | 0x6; /* Big Core */
|
|
else
|
|
return cpu_last_thread_sibling(cpu);
|
|
}
|
|
|
|
static inline int cpu_tlb_thread_sibling_step(void)
|
|
{
|
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
|
return 2; /* Big Core */
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
static inline u32 get_tensr(void)
|
|
{
|
|
#ifdef CONFIG_BOOKE
|
|
if (cpu_has_feature(CPU_FTR_SMT))
|
|
return mfspr(SPRN_TENSR);
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
void book3e_start_thread(int thread, unsigned long addr);
|
|
void book3e_stop_thread(int thread);
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#define INVALID_THREAD_HWID 0x0fff
|
|
|
|
#endif /* _ASM_POWERPC_CPUTHREADS_H */
|
|
|