nfp: bpf: add simple map op cache
authorJakub Kicinski <jakub.kicinski@netronome.com>
Wed, 28 Aug 2019 05:36:29 +0000 (22:36 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Fri, 30 Aug 2019 22:49:05 +0000 (00:49 +0200)
Each get_next and lookup call requires a round trip to the device.
However, the device is capable of giving us a few entries back,
instead of just one.

In this patch we ask for a small yet reasonable number of entries
(4) on every get_next call, and on subsequent get_next/lookup calls
check this little cache for a hit. The cache is only kept for 250us,
and is invalidated on every operation which may modify the map
(e.g. delete or update call). Note that operations may be performed
simultaneously, so we have to keep track of operations in flight.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
drivers/net/ethernet/netronome/nfp/bpf/fw.h
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/netronome/nfp/bpf/offload.c

index fcf880c82f3f1119cfc2872f2a0cc77711a44d3b..0e2db6ea79e96f7e9daca0fdcd84ce7b4483b0ed 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/bug.h>
 #include <linux/jiffies.h>
 #include <linux/skbuff.h>
+#include <linux/timekeeping.h>
 
 #include "../ccm.h"
 #include "../nfp_app.h"
@@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
        return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
 }
 
+static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
+              op == NFP_CCM_TYPE_BPF_MAP_DELETE;
+}
+
+static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
+              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
+{
+       return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
+              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static unsigned int
+nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+                         const u8 *key, u8 *out_key, u8 *out_value,
+                         u32 *cache_gen)
+{
+       struct bpf_map *map = &nfp_map->offmap->map;
+       struct nfp_app_bpf *bpf = nfp_map->bpf;
+       unsigned int i, count, n_entries;
+       struct cmsg_reply_map_op *reply;
+
+       n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
+
+       spin_lock(&nfp_map->cache_lock);
+       *cache_gen = nfp_map->cache_gen;
+       if (nfp_map->cache_blockers)
+               n_entries = 1;
+
+       if (nfp_bpf_ctrl_op_cache_invalidate(op))
+               goto exit_block;
+       if (!nfp_bpf_ctrl_op_cache_capable(op))
+               goto exit_unlock;
+
+       if (!nfp_map->cache)
+               goto exit_unlock;
+       if (nfp_map->cache_to < ktime_get_ns())
+               goto exit_invalidate;
+
+       reply = (void *)nfp_map->cache->data;
+       count = be32_to_cpu(reply->count);
+
+       for (i = 0; i < count; i++) {
+               void *cached_key;
+
+               cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
+               if (memcmp(cached_key, key, map->key_size))
+                       continue;
+
+               if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
+                       memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
+                              map->value_size);
+               if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
+                       if (i + 1 == count)
+                               break;
+
+                       memcpy(out_key,
+                              nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
+                              map->key_size);
+               }
+
+               n_entries = 0;
+               goto exit_unlock;
+       }
+       goto exit_unlock;
+
+exit_block:
+       nfp_map->cache_blockers++;
+exit_invalidate:
+       dev_consume_skb_any(nfp_map->cache);
+       nfp_map->cache = NULL;
+exit_unlock:
+       spin_unlock(&nfp_map->cache_lock);
+       return n_entries;
+}
+
+static void
+nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+                         struct sk_buff *skb, u32 cache_gen)
+{
+       bool blocker, filler;
+
+       blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
+       filler = nfp_bpf_ctrl_op_cache_fill(op);
+       if (blocker || filler) {
+               u64 to = 0;
+
+               if (filler)
+                       to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
+
+               spin_lock(&nfp_map->cache_lock);
+               if (blocker) {
+                       nfp_map->cache_blockers--;
+                       nfp_map->cache_gen++;
+               }
+               if (filler && !nfp_map->cache_blockers &&
+                   nfp_map->cache_gen == cache_gen) {
+                       nfp_map->cache_to = to;
+                       swap(nfp_map->cache, skb);
+               }
+               spin_unlock(&nfp_map->cache_lock);
+       }
+
+       dev_consume_skb_any(skb);
+}
+
 static int
 nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                      u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
 {
        struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+       unsigned int n_entries, reply_entries, count;
        struct nfp_app_bpf *bpf = nfp_map->bpf;
        struct bpf_map *map = &offmap->map;
        struct cmsg_reply_map_op *reply;
        struct cmsg_req_map_op *req;
        struct sk_buff *skb;
+       u32 cache_gen;
        int err;
 
        /* FW messages have no space for more than 32 bits of flags */
        if (flags >> 32)
                return -EOPNOTSUPP;
 
+       /* Handle op cache */
+       n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
+                                             out_value, &cache_gen);
+       if (!n_entries)
+               return 0;
+
        skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
-       if (!skb)
-               return -ENOMEM;
+       if (!skb) {
+               err = -ENOMEM;
+               goto err_cache_put;
+       }
 
        req = (void *)skb->data;
        req->tid = cpu_to_be32(nfp_map->tid);
-       req->count = cpu_to_be32(1);
+       req->count = cpu_to_be32(n_entries);
        req->flags = cpu_to_be32(flags);
 
        /* Copy inputs */
@@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
                       map->value_size);
 
-       skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
-                                 nfp_bpf_cmsg_map_reply_size(bpf, 1));
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
+       skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               goto err_cache_put;
+       }
+
+       if (skb->len < sizeof(*reply)) {
+               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
+                         op, skb->len);
+               err = -EIO;
+               goto err_free;
+       }
 
        reply = (void *)skb->data;
+       count = be32_to_cpu(reply->count);
        err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+       /* FW responds with message sized to hold the good entries,
+        * plus one extra entry if there was an error.
+        */
+       reply_entries = count + !!err;
+       if (n_entries > 1 && count)
+               err = 0;
        if (err)
                goto err_free;
 
+       if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
+               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
+                         op, skb->len, reply_entries);
+               err = -EIO;
+               goto err_free;
+       }
+
        /* Copy outputs */
        if (out_key)
                memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
@@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
                memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
                       map->value_size);
 
-       dev_consume_skb_any(skb);
+       nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
 
        return 0;
 err_free:
        dev_kfree_skb_any(skb);
+err_cache_put:
+       nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
        return err;
 }
 
@@ -275,7 +422,21 @@ unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
 
 unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
 {
-       return max(NFP_NET_DEFAULT_MTU, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
+       return max3(NFP_NET_DEFAULT_MTU,
+                   nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
+                   nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
+}
+
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
+{
+       unsigned int mtu, req_max, reply_max, entry_sz;
+
+       mtu = bpf->app->ctrl->dp.mtu;
+       entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
+       req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
+       reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
+
+       return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
 }
 
 void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
index 06c4286bd79e0a0b3578da221783a20d54d8a7f9..a83a0ad5e27de0c61f9748299e78f040b3ca7289 100644 (file)
@@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
        NFP_BPF_CAP_TYPE_QUEUE_SELECT   = 5,
        NFP_BPF_CAP_TYPE_ADJUST_TAIL    = 6,
        NFP_BPF_CAP_TYPE_ABI_VERSION    = 7,
+       NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
 };
 
 struct nfp_bpf_cap_tlv_func {
index 2b1773ed3de9112c97eae17f7dc6f0bcbb661886..8f732771d3fad8965318dca81a5e292a5310d8e6 100644 (file)
@@ -299,6 +299,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
        return 0;
 }
 
+static int
+nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
+                                u32 length)
+{
+       bpf->cmsg_multi_ent = true;
+       return 0;
+}
+
 static int
 nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
                              u32 length)
@@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
                                                          length))
                                goto err_release_free;
                        break;
+               case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
+                       if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
+                                                            length))
+                               goto err_release_free;
+                       break;
                default:
                        nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
                        break;
@@ -426,6 +439,11 @@ static int nfp_bpf_start(struct nfp_app *app)
                return -EINVAL;
        }
 
+       if (bpf->cmsg_multi_ent)
+               bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
+       else
+               bpf->cmsg_cache_cnt = 1;
+
        return 0;
 }
 
index f4802036eb42a49e52c509f49f2c40519cba27b4..fac9c6f9e197b44882309b08522187ee0f5eba74 100644 (file)
@@ -99,6 +99,7 @@ enum pkt_vec {
  * @maps_neutral:      hash table of offload-neutral maps (on pointer)
  *
  * @abi_version:       global BPF ABI version
+ * @cmsg_cache_cnt:    number of entries to read for caching
  *
  * @adjust_head:       adjust head capability
  * @adjust_head.flags:         extra flags for adjust head
@@ -124,6 +125,7 @@ enum pkt_vec {
  * @pseudo_random:     FW initialized the pseudo-random machinery (CSRs)
  * @queue_select:      BPF can set the RX queue ID in packet vector
  * @adjust_tail:       BPF can simply trunc packet size for adjust tail
+ * @cmsg_multi_ent:    FW can pack multiple map entries in a single cmsg
  */
 struct nfp_app_bpf {
        struct nfp_app *app;
@@ -134,6 +136,8 @@ struct nfp_app_bpf {
        unsigned int cmsg_key_sz;
        unsigned int cmsg_val_sz;
 
+       unsigned int cmsg_cache_cnt;
+
        struct list_head map_list;
        unsigned int maps_in_use;
        unsigned int map_elems_in_use;
@@ -169,6 +173,7 @@ struct nfp_app_bpf {
        bool pseudo_random;
        bool queue_select;
        bool adjust_tail;
+       bool cmsg_multi_ent;
 };
 
 enum nfp_bpf_map_use {
@@ -183,11 +188,21 @@ struct nfp_bpf_map_word {
        unsigned char non_zero_update   :1;
 };
 
+#define NFP_BPF_MAP_CACHE_CNT          4U
+#define NFP_BPF_MAP_CACHE_TIME_NS      (250 * 1000)
+
 /**
  * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
  * @offmap:    pointer to the offloaded BPF map
  * @bpf:       back pointer to bpf app private structure
  * @tid:       table id identifying map on datapath
+ *
+ * @cache_lock:        protects @cache_blockers, @cache_to, @cache
+ * @cache_blockers:    number of ops in flight which block caching
+ * @cache_gen: counter incremented by every blocker on exit
+ * @cache_to:  time when cache will no longer be valid (ns)
+ * @cache:     skb with cached response
+ *
  * @l:         link on the nfp_app_bpf->map_list list
  * @use_map:   map of how the value is used (in 4B chunks)
  */
@@ -195,6 +210,13 @@ struct nfp_bpf_map {
        struct bpf_offloaded_map *offmap;
        struct nfp_app_bpf *bpf;
        u32 tid;
+
+       spinlock_t cache_lock;
+       u32 cache_blockers;
+       u32 cache_gen;
+       u64 cache_to;
+       struct sk_buff *cache;
+
        struct list_head l;
        struct nfp_bpf_map_word use_map[];
 };
@@ -566,6 +588,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
 
 unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
 unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
 long long int
 nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
 void
index 39c9fec222b45823eca3772a77b239938188fcd2..88fab6a82acff88716dc70b20ca3b295b257e6c9 100644 (file)
@@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
        offmap->dev_priv = nfp_map;
        nfp_map->offmap = offmap;
        nfp_map->bpf = bpf;
+       spin_lock_init(&nfp_map->cache_lock);
 
        res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
        if (res < 0) {
@@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
        struct nfp_bpf_map *nfp_map = offmap->dev_priv;
 
        nfp_bpf_ctrl_free_map(bpf, nfp_map);
+       dev_consume_skb_any(nfp_map->cache);
+       WARN_ON_ONCE(nfp_map->cache_blockers);
        list_del_init(&nfp_map->l);
        bpf->map_elems_in_use -= offmap->map.max_entries;
        bpf->maps_in_use--;