rcubarrier.txt
- RCU and Unloadable Modules
rculist_nulls.txt
- - RCU list primitives for use with SLAB_DESTROY_BY_RCU
+ - RCU list primitives for use with SLAB_TYPESAFE_BY_RCU
rcuref.txt
- Reference-count design for elements of lists/arrays protected by RCU
rcu.txt
Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_DESTROY_BY_RCU allocations.
+objects using SLAB_TYPESAFE_BY_RCU allocations.
Please read the basics in Documentation/RCU/listRCU.txt
to solve following problem :
A typical RCU linked list managing objects which are
-allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
use following algos :
1) Lookup algo
3) Remove algo
--------------
Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
very very fast (before the end of RCU grace period)
if (put_last_reference_on(obj) {
e. Is your workload too update-intensive for normal use of
RCU, but inappropriate for other synchronization mechanisms?
- If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
+ If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+ named SLAB_DESTROY_BY_RCU). But please be careful!
f. Do you need read-side critical sections that are respected
even though they are in the middle of the idle loop, during
dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_DESTROY_BY_RCU);
+ SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
goto err_vmas;
__i915_gem_active_get_rcu(const struct i915_gem_active *active)
{
/* Performing a lockless retrieval of the active request is super
- * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+ * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
* slab of request objects will not be freed whilst we hold the
* RCU read lock. It does not guarantee that the request itself
* will not be freed and then *reused*. Viz,
ldlm_lock_slab = kmem_cache_create("ldlm_locks",
sizeof(struct ldlm_lock), 0,
SLAB_HWCACHE_ALIGN |
- SLAB_DESTROY_BY_RCU, NULL);
+ SLAB_TYPESAFE_BY_RCU, NULL);
if (!ldlm_lock_slab) {
kmem_cache_destroy(ldlm_resource_slab);
return -ENOMEM;
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head),
0, /* offset */
- SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
+ SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
NULL); /* ctor */
retval = 0;
if (!jbd2_journal_head_cache) {
/*
* The lockless check can race with remove_wait_queue() in progress,
* but in this case its caller should run under rcu_read_lock() and
- * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+ * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
*/
if (likely(!waitqueue_active(wqh)))
return;
*
* Function returns NULL if no refcount could be obtained, or the fence.
* This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU),
* so long as the caller is using RCU on the pointer to the fence.
*
* An alternative mechanism is to employ a seqlock to protect a bunch of
* have successfully acquire a reference to it. If it no
* longer matches, we are holding a reference to some other
* reallocated pointer. This is possible if the allocator
- * is using a freelist like SLAB_DESTROY_BY_RCU where the
+ * is using a freelist like SLAB_TYPESAFE_BY_RCU where the
* fence remains valid for the RCU grace period, but it
* may be reallocated. When using such allocators, we are
* responsible for ensuring the reference we get is to
#define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */
#define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */
/*
- * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
+ * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
*
* This delays freeing the SLAB page by a grace period, it does _NOT_
* delay object freeing. This means that if you do kmem_cache_free()
*
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
+ *
+ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
-#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
+#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
#define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */
struct module;
/*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
* un-modified. Special care is taken when initializing object to zero.
*/
static inline void sk_prot_clear_nulls(struct sock *sk, int size)
if (atomic_dec_and_test(&sighand->count)) {
signalfd_cleanup(sighand);
/*
- * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it
+ * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
* without an RCU grace period, see __lock_task_sighand().
*/
kmem_cache_free(sighand_cachep, sighand);
{
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
}
/*
* This sighand can be already freed and even reused, but
- * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which
+ * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
* initializes ->siglock: this slab can't go away, it has
* the same object type, ->siglock can't be reinitialized.
*
*size += sizeof(struct kasan_alloc_meta);
/* Add free meta. */
- if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+ if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
cache->object_size < sizeof(struct kasan_free_meta)) {
cache->kasan_info.free_meta_offset = *size;
*size += sizeof(struct kasan_free_meta);
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
/* RCU slabs could be legally used after free within the RCU period */
- if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
return;
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
s8 shadow_byte;
/* RCU slabs could be legally used after free within the RCU period */
- if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
return false;
shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
{
/* TODO: RCU freeing is unsupported for now; hide false positives. */
- if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+ if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
kmemcheck_mark_freed(object, size);
}
void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
- 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
+ 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
anon_vma_ctor);
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
SLAB_PANIC|SLAB_ACCOUNT);
* If this page is still mapped, then its anon_vma cannot have been
* freed. But if it has been unmapped, we have no security against the
* anon_vma structure being freed and reused (for another anon_vma:
- * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+ * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
* above cannot corrupt).
*/
if (!page_mapped(page)) {
freelist = page->freelist;
slab_destroy_debugcheck(cachep, page);
- if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&page->rcu_head, kmem_rcu_free);
else
kmem_freepages(cachep, page);
cachep->num = 0;
- if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU)
+ if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
return false;
left = calculate_slab_order(cachep, size,
if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2 * sizeof(unsigned long long)))
flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
- if (!(flags & SLAB_DESTROY_BY_RCU))
+ if (!(flags & SLAB_TYPESAFE_BY_RCU))
flags |= SLAB_POISON;
#endif
#endif
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
* back there or track user information then we can
* only use the space before that information.
*/
- if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+ if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
return s->inuse;
/*
* Else we can use all the padding etc for the allocation
* Set of flags that will prevent slab merging
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
- SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
struct kmem_cache *s, *s2;
/*
- * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+ * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
* @slab_caches_to_rcu_destroy list. The slab pages are freed
* through RCU and and the associated kmem_cache are dereferenced
* while freeing the pages, so the kmem_caches should be freed only
memcg_unlink_cache(s);
list_del(&s->list);
- if (s->flags & SLAB_DESTROY_BY_RCU) {
+ if (s->flags & SLAB_TYPESAFE_BY_RCU) {
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
/*
* struct slob_rcu is inserted at the tail of allocated slob blocks, which
- * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
+ * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
* the block using call_rcu.
*/
struct slob_rcu {
int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
{
- if (flags & SLAB_DESTROY_BY_RCU) {
+ if (flags & SLAB_TYPESAFE_BY_RCU) {
/* leave room for rcu footer at the end of object */
c->size += sizeof(struct slob_rcu);
}
void kmem_cache_free(struct kmem_cache *c, void *b)
{
kmemleak_free_recursive(b, c->flags);
- if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
+ if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
struct slob_rcu *slob_rcu;
slob_rcu = b + (c->size - sizeof(struct slob_rcu));
slob_rcu->size = c->size;
static void free_slab(struct kmem_cache *s, struct page *page)
{
- if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
+ if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
struct rcu_head *head;
if (need_reserve_slab_rcu) {
* slab_free_freelist_hook() could have put the items into quarantine.
* If so, no need to free them.
*/
- if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+ if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
return;
do_slab_free(s, page, head, tail, cnt, addr);
}
* the slab may touch the object after free or before allocation
* then we should never poison the object itself.
*/
- if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
+ if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
!s->ctor)
s->flags |= __OBJECT_POISON;
else
*/
s->inuse = size;
- if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
+ if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
s->ctor)) {
/*
* Relocate free pointer after the object if it is not
s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
s->reserved = 0;
- if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+ if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
s->reserved = sizeof(struct rcu_head);
if (!calculate_sizes(s, -1))
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
+ return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
}
SLAB_ATTR_RO(destroy_by_rcu);
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.name = "LLC",
.owner = THIS_MODULE,
.obj_size = sizeof(struct llc_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
};
/**
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_estab_match(sap, daddr, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_listener_match(sap, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_dgram_match(sap, laddr, rc)) {
- /* Extra checks required by SLAB_DESTROY_BY_RCU */
+ /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
continue;
/* kill only if still in same netns -- might have moved due to
- * SLAB_DESTROY_BY_RCU rules.
+ * SLAB_TYPESAFE_BY_RCU rules.
*
* We steal the timer reference. If that fails timer has
* already fired or someone else deleted it. Just drop ref
/*
* Do not use kmem_cache_zalloc(), as this cache uses
- * SLAB_DESTROY_BY_RCU.
+ * SLAB_TYPESAFE_BY_RCU.
*/
ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
if (ct == NULL)
struct net *net = nf_ct_net(ct);
/* A freed object has refcnt == 0, that's
- * the golden rule for SLAB_DESTROY_BY_RCU
+ * the golden rule for SLAB_TYPESAFE_BY_RCU
*/
NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn),
NFCT_INFOMASK + 1,
- SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep;
.unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
- .slab_flags = SLAB_DESTROY_BY_RCU,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto);