rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
- mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
- mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
Only four helpers are declared for the moment :
hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum);
struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness;
rcu_read_lock();
begin:
result = NULL;
badness = -1;
sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
result = sk;
badness = score;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != hash)
goto begin;
if (result) {
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
daddr, dport, dif) < badness)) {
sock_put(result);
goto begin;
}
}
rcu_read_unlock();
return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 22:37:55 -05:00
|
|
|
#ifndef _LINUX_RCULIST_NULLS_H
|
|
|
|
#define _LINUX_RCULIST_NULLS_H
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RCU-protected list version
|
|
|
|
*/
|
|
|
|
#include <linux/list_nulls.h>
|
|
|
|
#include <linux/rcupdate.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
* hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
|
|
|
|
* @n: the element to delete from the hash list.
|
|
|
|
*
|
|
|
|
* Note: hlist_nulls_unhashed() on the node return true after this. It is
|
|
|
|
* useful for RCU based read lockfree traversal if the writer side
|
|
|
|
* must know if the list entry is still hashed or already unhashed.
|
|
|
|
*
|
|
|
|
* In particular, it means that we can not poison the forward pointers
|
|
|
|
* that may still be used for walking the hash list and we can only
|
|
|
|
* zero the pprev pointer so list_unhashed() will return true after
|
|
|
|
* this.
|
|
|
|
*
|
|
|
|
* The caller must take whatever precautions are necessary (such as
|
|
|
|
* holding appropriate locks) to avoid racing with another
|
|
|
|
* list-mutation primitive, such as hlist_nulls_add_head_rcu() or
|
|
|
|
* hlist_nulls_del_rcu(), running on this same list. However, it is
|
|
|
|
* perfectly legal to run concurrently with the _rcu list-traversal
|
|
|
|
* primitives, such as hlist_nulls_for_each_entry_rcu().
|
|
|
|
*/
|
|
|
|
static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
|
|
|
|
{
|
|
|
|
if (!hlist_nulls_unhashed(n)) {
|
|
|
|
__hlist_nulls_del(n);
|
|
|
|
n->pprev = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
|
|
|
|
* @n: the element to delete from the hash list.
|
|
|
|
*
|
|
|
|
* Note: hlist_nulls_unhashed() on entry does not return true after this,
|
|
|
|
* the entry is in an undefined state. It is useful for RCU based
|
|
|
|
* lockfree traversal.
|
|
|
|
*
|
|
|
|
* In particular, it means that we can not poison the forward
|
|
|
|
* pointers that may still be used for walking the hash list.
|
|
|
|
*
|
|
|
|
* The caller must take whatever precautions are necessary
|
|
|
|
* (such as holding appropriate locks) to avoid racing
|
|
|
|
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
|
|
|
|
* or hlist_nulls_del_rcu(), running on this same list.
|
|
|
|
* However, it is perfectly legal to run concurrently with
|
|
|
|
* the _rcu list-traversal primitives, such as
|
|
|
|
* hlist_nulls_for_each_entry().
|
|
|
|
*/
|
|
|
|
static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
|
|
|
|
{
|
|
|
|
__hlist_nulls_del(n);
|
|
|
|
n->pprev = LIST_POISON2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* hlist_nulls_add_head_rcu
|
|
|
|
* @n: the element to add to the hash list.
|
|
|
|
* @h: the list to add to.
|
|
|
|
*
|
|
|
|
* Description:
|
|
|
|
* Adds the specified element to the specified hlist_nulls,
|
|
|
|
* while permitting racing traversals.
|
|
|
|
*
|
|
|
|
* The caller must take whatever precautions are necessary
|
|
|
|
* (such as holding appropriate locks) to avoid racing
|
|
|
|
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
|
|
|
|
* or hlist_nulls_del_rcu(), running on this same list.
|
|
|
|
* However, it is perfectly legal to run concurrently with
|
|
|
|
* the _rcu list-traversal primitives, such as
|
|
|
|
* hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
|
|
|
|
* problems on Alpha CPUs. Regardless of the type of CPU, the
|
|
|
|
* list-traversal primitive must be guarded by rcu_read_lock().
|
|
|
|
*/
|
|
|
|
static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
|
|
|
|
struct hlist_nulls_head *h)
|
|
|
|
{
|
|
|
|
struct hlist_nulls_node *first = h->first;
|
|
|
|
|
|
|
|
n->next = first;
|
|
|
|
n->pprev = &h->first;
|
|
|
|
rcu_assign_pointer(h->first, n);
|
|
|
|
if (!is_a_nulls(first))
|
|
|
|
first->pprev = &n->next;
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
|
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
|
|
|
|
* @head: the head for your list.
|
|
|
|
* @member: the name of the hlist_nulls_node within the struct.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
|
|
|
|
for (pos = rcu_dereference((head)->first); \
|
2009-09-18 13:28:19 -04:00
|
|
|
(!is_a_nulls(pos)) && \
|
rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
- mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
- mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
Only four helpers are declared for the moment :
hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum);
struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness;
rcu_read_lock();
begin:
result = NULL;
badness = -1;
sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
result = sk;
badness = score;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != hash)
goto begin;
if (result) {
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
daddr, dport, dif) < badness)) {
sock_put(result);
goto begin;
}
}
rcu_read_unlock();
return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-11-16 22:37:55 -05:00
|
|
|
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
|
|
|
|
pos = rcu_dereference(pos->next))
|
|
|
|
|
|
|
|
#endif
|
|
|
|
#endif
|