bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM_TRIE
authorCraig Gallek <kraig@google.com>
Mon, 18 Sep 2017 19:30:55 +0000 (15:30 -0400)
committerDavid S. Miller <davem@davemloft.net>
Tue, 19 Sep 2017 20:55:15 +0000 (13:55 -0700)
This is a simple non-recursive delete operation.  It prunes paths
of empty nodes in the tree, but it does not try to further compress
the tree as nodes are removed.

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
kernel/bpf/lpm_trie.c

index 1b767844a76ff54b1e99c0c191964269a34c85a7..9d58a576b2aef66d83d698b34ba312cf89b0b07d 100644 (file)
@@ -389,10 +389,84 @@ out:
        return ret;
 }
 
-static int trie_delete_elem(struct bpf_map *map, void *key)
+/* Called from syscall or from eBPF program */
+static int trie_delete_elem(struct bpf_map *map, void *_key)
 {
-       /* TODO */
-       return -ENOSYS;
+       struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+       struct bpf_lpm_trie_key *key = _key;
+       struct lpm_trie_node __rcu **trim;
+       struct lpm_trie_node *node;
+       unsigned long irq_flags;
+       unsigned int next_bit;
+       size_t matchlen = 0;
+       int ret = 0;
+
+       if (key->prefixlen > trie->max_prefixlen)
+               return -EINVAL;
+
+       raw_spin_lock_irqsave(&trie->lock, irq_flags);
+
+       /* Walk the tree looking for an exact key/length match and keeping
+        * track of where we could begin trimming the tree.  The trim-point
+        * is the sub-tree along the walk consisting of only single-child
+        * intermediate nodes and ending at a leaf node that we want to
+        * remove.
+        */
+       trim = &trie->root;
+       node = rcu_dereference_protected(
+               trie->root, lockdep_is_held(&trie->lock));
+       while (node) {
+               matchlen = longest_prefix_match(trie, node, key);
+
+               if (node->prefixlen != matchlen ||
+                   node->prefixlen == key->prefixlen)
+                       break;
+
+               next_bit = extract_bit(key->data, node->prefixlen);
+               /* If we hit a node that has more than one child or is a valid
+                * prefix itself, do not remove it. Reset the root of the trim
+                * path to its descendant on our path.
+                */
+               if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
+                   (node->child[0] && node->child[1]))
+                       trim = &node->child[next_bit];
+               node = rcu_dereference_protected(
+                       node->child[next_bit], lockdep_is_held(&trie->lock));
+       }
+
+       if (!node || node->prefixlen != key->prefixlen ||
+           (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       trie->n_entries--;
+
+       /* If the node we are removing is not a leaf node, simply mark it
+        * as intermediate and we are done.
+        */
+       if (rcu_access_pointer(node->child[0]) ||
+           rcu_access_pointer(node->child[1])) {
+               node->flags |= LPM_TREE_NODE_FLAG_IM;
+               goto out;
+       }
+
+       /* trim should now point to the slot holding the start of a path from
+        * zero or more intermediate nodes to our leaf node for deletion.
+        */
+       while ((node = rcu_dereference_protected(
+                       *trim, lockdep_is_held(&trie->lock)))) {
+               RCU_INIT_POINTER(*trim, NULL);
+               trim = rcu_access_pointer(node->child[0]) ?
+                       &node->child[0] :
+                       &node->child[1];
+               kfree_rcu(node, rcu);
+       }
+
+out:
+       raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+
+       return ret;
 }
 
 #define LPM_DATA_SIZE_MAX      256