e192832869
Pull locking updates from Ingo Molnar: "The main changes in this cycle are: - rwsem scalability improvements, phase #2, by Waiman Long, which are rather impressive: "On a 2-socket 40-core 80-thread Skylake system with 40 reader and writer locking threads, the min/mean/max locking operations done in a 5-second testing window before the patchset were: 40 readers, Iterations Min/Mean/Max = 1,807/1,808/1,810 40 writers, Iterations Min/Mean/Max = 1,807/50,344/151,255 After the patchset, they became: 40 readers, Iterations Min/Mean/Max = 30,057/31,359/32,741 40 writers, Iterations Min/Mean/Max = 94,466/95,845/97,098" There's a lot of changes to the locking implementation that makes it similar to qrwlock, including owner handoff for more fair locking. Another microbenchmark shows how across the spectrum the improvements are: "With a locking microbenchmark running on 5.1 based kernel, the total locking rates (in kops/s) on a 2-socket Skylake system with equal numbers of readers and writers (mixed) before and after this patchset were: # of Threads Before Patch After Patch ------------ ------------ ----------- 2 2,618 4,193 4 1,202 3,726 8 802 3,622 16 729 3,359 32 319 2,826 64 102 2,744" The changes are extensive and the patch-set has been through several iterations addressing various locking workloads. There might be more regressions, but unless they are pathological I believe we want to use this new implementation as the baseline going forward. - jump-label optimizations by Daniel Bristot de Oliveira: the primary motivation was to remove IPI disturbance of isolated RT-workload CPUs, which resulted in the implementation of batched jump-label updates. Beyond the improvement of the real-time characteristics kernel, in one test this patchset improved static key update overhead from 57 msecs to just 1.4 msecs - which is a nice speedup as well. - atomic64_t cross-arch type cleanups by Mark Rutland: over the last ~10 years of atomic64_t existence the various types used by the APIs only had to be self-consistent within each architecture - which means they became wildly inconsistent across architectures. Mark puts and end to this by reworking all the atomic64 implementations to use 's64' as the base type for atomic64_t, and to ensure that this type is consistently used for parameters and return values in the API, avoiding further problems in this area. - A large set of small improvements to lockdep by Yuyang Du: type cleanups, output cleanups, function return type and othr cleanups all around the place. - A set of percpu ops cleanups and fixes by Peter Zijlstra. - Misc other changes - please see the Git log for more details" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits) locking/lockdep: increase size of counters for lockdep statistics locking/atomics: Use sed(1) instead of non-standard head(1) option locking/lockdep: Move mark_lock() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING x86/jump_label: Make tp_vec_nr static x86/percpu: Optimize raw_cpu_xchg() x86/percpu, sched/fair: Avoid local_clock() x86/percpu, x86/irq: Relax {set,get}_irq_regs() x86/percpu: Relax smp_processor_id() x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}() locking/rwsem: Guard against making count negative locking/rwsem: Adaptive disabling of reader optimistic spinning locking/rwsem: Enable time-based spinning on reader-owned rwsem locking/rwsem: Make rwsem->owner an atomic_long_t locking/rwsem: Enable readers spinning on writer locking/rwsem: Clarify usage of owner's nonspinaable bit locking/rwsem: Wake up almost all readers in wait queue locking/rwsem: More optimal RT task handling of null owner locking/rwsem: Always release wait_lock before waking up tasks locking/rwsem: Implement lock handoff to prevent lock starvation locking/rwsem: Make rwsem_spin_on_owner() return owner state ...
139 lines
3.9 KiB
C
139 lines
3.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PERCPU_RWSEM_H
|
|
#define _LINUX_PERCPU_RWSEM_H
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/rcuwait.h>
|
|
#include <linux/rcu_sync.h>
|
|
#include <linux/lockdep.h>
|
|
|
|
struct percpu_rw_semaphore {
|
|
struct rcu_sync rss;
|
|
unsigned int __percpu *read_count;
|
|
struct rw_semaphore rw_sem; /* slowpath */
|
|
struct rcuwait writer; /* blocked writer */
|
|
int readers_block;
|
|
};
|
|
|
|
#define __DEFINE_PERCPU_RWSEM(name, is_static) \
|
|
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
|
|
is_static struct percpu_rw_semaphore name = { \
|
|
.rss = __RCU_SYNC_INITIALIZER(name.rss), \
|
|
.read_count = &__percpu_rwsem_rc_##name, \
|
|
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
|
|
.writer = __RCUWAIT_INITIALIZER(name.writer), \
|
|
}
|
|
#define DEFINE_PERCPU_RWSEM(name) \
|
|
__DEFINE_PERCPU_RWSEM(name, /* not static */)
|
|
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
|
|
__DEFINE_PERCPU_RWSEM(name, static)
|
|
|
|
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
|
|
extern void __percpu_up_read(struct percpu_rw_semaphore *);
|
|
|
|
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
|
|
{
|
|
might_sleep();
|
|
|
|
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
|
|
|
|
preempt_disable();
|
|
/*
|
|
* We are in an RCU-sched read-side critical section, so the writer
|
|
* cannot both change sem->state from readers_fast and start checking
|
|
* counters while we are here. So if we see !sem->state, we know that
|
|
* the writer won't be checking until we're past the preempt_enable()
|
|
* and that once the synchronize_rcu() is done, the writer will see
|
|
* anything we did within this RCU-sched read-size critical section.
|
|
*/
|
|
__this_cpu_inc(*sem->read_count);
|
|
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
|
|
__percpu_down_read(sem, false); /* Unconditional memory barrier */
|
|
/*
|
|
* The preempt_enable() prevents the compiler from
|
|
* bleeding the critical section out.
|
|
*/
|
|
preempt_enable();
|
|
}
|
|
|
|
static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
|
|
{
|
|
int ret = 1;
|
|
|
|
preempt_disable();
|
|
/*
|
|
* Same as in percpu_down_read().
|
|
*/
|
|
__this_cpu_inc(*sem->read_count);
|
|
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
|
|
ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
|
|
preempt_enable();
|
|
/*
|
|
* The barrier() from preempt_enable() prevents the compiler from
|
|
* bleeding the critical section out.
|
|
*/
|
|
|
|
if (ret)
|
|
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
|
|
{
|
|
preempt_disable();
|
|
/*
|
|
* Same as in percpu_down_read().
|
|
*/
|
|
if (likely(rcu_sync_is_idle(&sem->rss)))
|
|
__this_cpu_dec(*sem->read_count);
|
|
else
|
|
__percpu_up_read(sem); /* Unconditional memory barrier */
|
|
preempt_enable();
|
|
|
|
rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
|
|
}
|
|
|
|
extern void percpu_down_write(struct percpu_rw_semaphore *);
|
|
extern void percpu_up_write(struct percpu_rw_semaphore *);
|
|
|
|
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
|
|
const char *, struct lock_class_key *);
|
|
|
|
extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
|
|
|
|
#define percpu_init_rwsem(sem) \
|
|
({ \
|
|
static struct lock_class_key rwsem_key; \
|
|
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
|
|
})
|
|
|
|
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
|
|
|
|
#define percpu_rwsem_assert_held(sem) \
|
|
lockdep_assert_held(&(sem)->rw_sem)
|
|
|
|
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
|
|
bool read, unsigned long ip)
|
|
{
|
|
lock_release(&sem->rw_sem.dep_map, 1, ip);
|
|
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
|
if (!read)
|
|
atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
|
|
#endif
|
|
}
|
|
|
|
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
|
|
bool read, unsigned long ip)
|
|
{
|
|
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
|
|
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
|
if (!read)
|
|
atomic_long_set(&sem->rw_sem.owner, (long)current);
|
|
#endif
|
|
}
|
|
|
|
#endif
|