bpf: restore behavior of bpf_map_update_elem

author Alexei Starovoitov <ast@fb.com>

Fri, 5 Aug 2016 21:01:27 +0000 (14:01 -0700)

committer David S. Miller <davem@davemloft.net>

Sun, 7 Aug 2016 00:49:19 +0000 (20:49 -0400)
author Alexei Starovoitov <ast@fb.com>
Fri, 5 Aug 2016 21:01:27 +0000 (14:01 -0700)
committer David S. Miller <davem@davemloft.net>
Sun, 7 Aug 2016 00:49:19 +0000 (20:49 -0400)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index fff3650d52fc774f7efdf2d289e879a916f44c60..570eeca7bdfa79ce16d18ee94fac64c40aa11d38 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
         struct bucket *buckets;
         void *elems;
         struct pcpu_freelist freelist;
+       void __percpu *extra_elems;
         atomic_t count; /* number of elements in this hashtable */
         u32 n_buckets;  /* number of hash buckets */
         u32 elem_size;  /* size of each element in bytes */
  };
  
+enum extra_elem_state {
+       HTAB_NOT_AN_EXTRA_ELEM = 0,
+       HTAB_EXTRA_ELEM_FREE,
+       HTAB_EXTRA_ELEM_USED
+};
+
  /* each htab element is struct htab_elem + key + value */
  struct htab_elem {
         union {
@@ -38,7 +45,10 @@ struct htab_elem {
                 struct bpf_htab *htab;
                 struct pcpu_freelist_node fnode;
         };
-       struct rcu_head rcu;
+       union {
+               struct rcu_head rcu;
+               enum extra_elem_state state;
+       };
         u32 hash;
         char key[0] __aligned(8);
  };
@@ -113,6 +123,23 @@ free_elems:
         return err;
  }
  
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+       void __percpu *pptr;
+       int cpu;
+
+       pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+       if (!pptr)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+                       HTAB_EXTRA_ELEM_FREE;
+       }
+       htab->extra_elems = pptr;
+       return 0;
+}
+
  /* Called from syscall */
  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
  {
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
         if (percpu)
                 cost += (u64) round_up(htab->map.value_size, 8) *
                         num_possible_cpus() * htab->map.max_entries;
+       else
+              cost += (u64) htab->elem_size * num_possible_cpus();
  
         if (cost >= U32_MAX - PAGE_SIZE)
                 /* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                 raw_spin_lock_init(&htab->buckets[i].lock);
         }
  
+       if (!percpu) {
+               err = alloc_extra_elems(htab);
+               if (err)
+                       goto free_buckets;
+       }
+
         if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
                 err = prealloc_elems_and_freelist(htab);
                 if (err)
-                       goto free_buckets;
+                       goto free_extra_elems;
         }
  
         return &htab->map;
  
+free_extra_elems:
+       free_percpu(htab->extra_elems);
  free_buckets:
         kvfree(htab->buckets);
  free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
         if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
                 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
         kfree(l);
-
  }
  
  static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
  
  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  {
+       if (l->state == HTAB_EXTRA_ELEM_USED) {
+               l->state = HTAB_EXTRA_ELEM_FREE;
+               return;
+       }
+
         if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
                 pcpu_freelist_push(&htab->freelist, &l->fnode);
         } else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  
  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                          void *value, u32 key_size, u32 hash,
-                                        bool percpu, bool onallcpus)
+                                        bool percpu, bool onallcpus,
+                                        bool old_elem_exists)
  {
         u32 size = htab->map.value_size;
         bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
         struct htab_elem *l_new;
         void __percpu *pptr;
+       int err = 0;
  
         if (prealloc) {
                 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
                 if (!l_new)
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
         } else {
                 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
                         atomic_dec(&htab->count);
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
+               } else {
+                       l_new = kmalloc(htab->elem_size,
+                                       GFP_ATOMIC | __GFP_NOWARN);
+                       if (!l_new)
+                               return ERR_PTR(-ENOMEM);
                 }
-               l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-               if (!l_new)
-                       return ERR_PTR(-ENOMEM);
+       }
+
+       if (err) {
+               if (!old_elem_exists)
+                       return ERR_PTR(err);
+
+               /* if we're updating the existing element and the hash table
+                * is full, use per-cpu extra elems
+                */
+               l_new = this_cpu_ptr(htab->extra_elems);
+               if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+                       return ERR_PTR(-E2BIG);
+               l_new->state = HTAB_EXTRA_ELEM_USED;
+       } else {
+               l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
         }
  
         memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
         if (ret)
                 goto err;
  
-       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+                               !!l_old);
         if (IS_ERR(l_new)) {
                 /* all pre-allocated elements are in use or memory exhausted */
                 ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                 }
         } else {
                 l_new = alloc_htab_elem(htab, key, value, key_size,
-                                       hash, true, onallcpus);
+                                       hash, true, onallcpus, false);
                 if (IS_ERR(l_new)) {
                         ret = PTR_ERR(l_new);
                         goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
                 htab_free_elems(htab);
                 pcpu_freelist_destroy(&htab->freelist);
         }
+       free_percpu(htab->extra_elems);
         kvfree(htab->buckets);
         kfree(htab);
  }
author	Alexei Starovoitov <ast@fb.com>
	Fri, 5 Aug 2016 21:01:27 +0000 (14:01 -0700)
committer	David S. Miller <davem@davemloft.net>
	Sun, 7 Aug 2016 00:49:19 +0000 (20:49 -0400)