nfp: nsp: allow the use of DMA buffer
authorJakub Kicinski <jakub.kicinski@netronome.com>
Wed, 27 Feb 2019 04:19:32 +0000 (20:19 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 1 Mar 2019 19:36:01 +0000 (11:36 -0800)
Newer versions of NSP can access host memory.  Simplest access
type requires all data to be in one contiguous area.  Since we
don't have the guarantee on where callers of the NSP ABI will
allocate their buffers we allocate a bounce buffer and copy
the data in and out.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c

index 28262b0fc89a18d07d82b3ef9b252781bbd6d6b9..dd6256841a37feab4cb1b5ed58a76dc32739f58f 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/firmware.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/overflow.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
@@ -37,6 +38,7 @@
 #define NSP_COMMAND            0x08
 #define   NSP_COMMAND_OPTION   GENMASK_ULL(63, 32)
 #define   NSP_COMMAND_CODE     GENMASK_ULL(31, 16)
+#define   NSP_COMMAND_DMA_BUF  BIT_ULL(1)
 #define   NSP_COMMAND_START    BIT_ULL(0)
 
 /* CPP address to retrieve the data from */
 #define   NSP_DFLT_BUFFER_ADDRESS      GENMASK_ULL(39, 0)
 
 #define NSP_DFLT_BUFFER_CONFIG 0x20
+#define   NSP_DFLT_BUFFER_DMA_CHUNK_ORDER      GENMASK_ULL(63, 58)
 #define   NSP_DFLT_BUFFER_SIZE_4KB     GENMASK_ULL(15, 8)
 #define   NSP_DFLT_BUFFER_SIZE_MB      GENMASK_ULL(7, 0)
 
+#define NFP_CAP_CMD_DMA_SG     0x28
+
 #define NSP_MAGIC              0xab10
 #define NSP_MAJOR              0
 #define NSP_MINOR              8
@@ -92,6 +97,16 @@ enum nfp_nsp_cmd {
        SPCODE_VERSIONS         = 21, /* Report FW versions */
 };
 
+struct nfp_nsp_dma_buf {
+       __le32 chunk_cnt;
+       __le32 reserved[3];
+       struct {
+               __le32 size;
+               __le32 reserved;
+               __le64 addr;
+       } descs[];
+};
+
 static const struct {
        int code;
        const char *msg;
@@ -120,6 +135,7 @@ struct nfp_nsp {
 /**
  * struct nfp_nsp_command_arg - NFP command argument structure
  * @code:      NFP SP Command Code
+ * @dma:       @buf points to a host buffer, not NSP buffer
  * @timeout_sec:Timeout value to wait for completion in seconds
  * @option:    NFP SP Command Argument
  * @buf:       NFP SP Buffer Address
@@ -127,6 +143,7 @@ struct nfp_nsp {
  */
 struct nfp_nsp_command_arg {
        u16 code;
+       bool dma;
        unsigned int timeout_sec;
        u32 option;
        u64 buf;
@@ -350,6 +367,7 @@ __nfp_nsp_command(struct nfp_nsp *state, const struct nfp_nsp_command_arg *arg)
        err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command,
                             FIELD_PREP(NSP_COMMAND_OPTION, arg->option) |
                             FIELD_PREP(NSP_COMMAND_CODE, arg->code) |
+                            FIELD_PREP(NSP_COMMAND_DMA_BUF, arg->dma) |
                             FIELD_PREP(NSP_COMMAND_START, 1));
        if (err < 0)
                return err;
@@ -456,10 +474,174 @@ nfp_nsp_command_buf_def(struct nfp_nsp *nsp,
        return ret;
 }
 
+static int
+nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
+                          struct nfp_nsp_command_buf_arg *arg,
+                          unsigned int max_size, unsigned int chunk_order,
+                          unsigned int dma_order)
+{
+       struct nfp_cpp *cpp = nsp->cpp;
+       struct nfp_nsp_dma_buf *desc;
+       struct {
+               dma_addr_t dma_addr;
+               unsigned long len;
+               void *chunk;
+       } *chunks;
+       size_t chunk_size, dma_size;
+       dma_addr_t dma_desc;
+       struct device *dev;
+       unsigned long off;
+       int i, ret, nseg;
+       size_t desc_sz;
+
+       chunk_size = BIT_ULL(chunk_order);
+       dma_size = BIT_ULL(dma_order);
+       nseg = DIV_ROUND_UP(max_size, chunk_size);
+
+       chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+       if (!chunks)
+               return -ENOMEM;
+
+       off = 0;
+       ret = -ENOMEM;
+       for (i = 0; i < nseg; i++) {
+               unsigned long coff;
+
+               chunks[i].chunk = kmalloc(chunk_size,
+                                         GFP_KERNEL | __GFP_NOWARN);
+               if (!chunks[i].chunk)
+                       goto exit_free_prev;
+
+               chunks[i].len = min_t(u64, chunk_size, max_size - off);
+
+               coff = 0;
+               if (arg->in_size > off) {
+                       coff = min_t(u64, arg->in_size - off, chunk_size);
+                       memcpy(chunks[i].chunk, arg->in_buf + off, coff);
+               }
+               memset(chunks[i].chunk + coff, 0, chunk_size - coff);
+
+               off += chunks[i].len;
+       }
+
+       dev = nfp_cpp_device(cpp)->parent;
+
+       for (i = 0; i < nseg; i++) {
+               dma_addr_t addr;
+
+               addr = dma_map_single(dev, chunks[i].chunk, chunks[i].len,
+                                     DMA_BIDIRECTIONAL);
+               chunks[i].dma_addr = addr;
+
+               ret = dma_mapping_error(dev, addr);
+               if (ret)
+                       goto exit_unmap_prev;
+
+               if (WARN_ONCE(round_down(addr, dma_size) !=
+                             round_down(addr + chunks[i].len - 1, dma_size),
+                             "unaligned DMA address: %pad %lu %zd\n",
+                             &addr, chunks[i].len, dma_size)) {
+                       ret = -EFAULT;
+                       i++;
+                       goto exit_unmap_prev;
+               }
+       }
+
+       desc_sz = struct_size(desc, descs, nseg);
+       desc = kmalloc(desc_sz, GFP_KERNEL);
+       if (!desc) {
+               ret = -ENOMEM;
+               goto exit_unmap_all;
+       }
+
+       desc->chunk_cnt = cpu_to_le32(nseg);
+       for (i = 0; i < nseg; i++) {
+               desc->descs[i].size = cpu_to_le32(chunks[i].len);
+               desc->descs[i].addr = cpu_to_le64(chunks[i].dma_addr);
+       }
+
+       dma_desc = dma_map_single(dev, desc, desc_sz, DMA_TO_DEVICE);
+       ret = dma_mapping_error(dev, dma_desc);
+       if (ret)
+               goto exit_free_desc;
+
+       arg->arg.dma = true;
+       arg->arg.buf = dma_desc;
+       ret = __nfp_nsp_command(nsp, &arg->arg);
+       if (ret < 0)
+               goto exit_unmap_desc;
+
+       i = 0;
+       off = 0;
+       while (off < arg->out_size) {
+               unsigned int len;
+
+               len = min_t(u64, chunks[i].len, arg->out_size - off);
+               memcpy(arg->out_buf + off, chunks[i].chunk, len);
+               off += len;
+               i++;
+       }
+
+exit_unmap_desc:
+       dma_unmap_single(dev, dma_desc, desc_sz, DMA_TO_DEVICE);
+exit_free_desc:
+       kfree(desc);
+exit_unmap_all:
+       i = nseg;
+exit_unmap_prev:
+       while (--i >= 0)
+               dma_unmap_single(dev, chunks[i].dma_addr, chunks[i].len,
+                                DMA_BIDIRECTIONAL);
+       i = nseg;
+exit_free_prev:
+       while (--i >= 0)
+               kfree(chunks[i].chunk);
+       kfree(chunks);
+       if (ret < 0)
+               nfp_err(cpp, "NSP: SG DMA failed for command 0x%04x: %d (sz:%d cord:%d)\n",
+                       arg->arg.code, ret, max_size, chunk_order);
+       return ret;
+}
+
+static int
+nfp_nsp_command_buf_dma(struct nfp_nsp *nsp,
+                       struct nfp_nsp_command_buf_arg *arg,
+                       unsigned int max_size, unsigned int dma_order)
+{
+       unsigned int chunk_order, buf_order;
+       struct nfp_cpp *cpp = nsp->cpp;
+       bool sg_ok;
+       u64 reg;
+       int err;
+
+       buf_order = order_base_2(roundup_pow_of_two(max_size));
+
+       err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res),
+                           nfp_resource_address(nsp->res) + NFP_CAP_CMD_DMA_SG,
+                           &reg);
+       if (err < 0)
+               return err;
+       sg_ok = reg & BIT_ULL(arg->arg.code - 1);
+
+       if (!sg_ok) {
+               if (buf_order > dma_order) {
+                       nfp_err(cpp, "NSP: can't service non-SG DMA for command 0x%04x\n",
+                               arg->arg.code);
+                       return -ENOMEM;
+               }
+               chunk_order = buf_order;
+       } else {
+               chunk_order = min_t(unsigned int, dma_order, PAGE_SHIFT);
+       }
+
+       return nfp_nsp_command_buf_dma_sg(nsp, arg, max_size, chunk_order,
+                                         dma_order);
+}
+
 static int
 nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg)
 {
-       unsigned int def_size, max_size;
+       unsigned int dma_order, def_size, max_size;
        struct nfp_cpp *cpp = nsp->cpp;
        u64 reg;
        int err;
@@ -484,12 +666,16 @@ nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg)
        max_size = max(arg->in_size, arg->out_size);
        def_size = FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M +
                   FIELD_GET(NSP_DFLT_BUFFER_SIZE_4KB, reg) * SZ_4K;
-       if (def_size >= max_size)
+       dma_order = FIELD_GET(NSP_DFLT_BUFFER_DMA_CHUNK_ORDER, reg);
+       if (def_size >= max_size) {
                return nfp_nsp_command_buf_def(nsp, arg);
+       } else if (!dma_order) {
+               nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n",
+                       arg->arg.code, def_size, max_size);
+               return -EINVAL;
+       }
 
-       nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n",
-               arg->arg.code, def_size, max_size);
-       return -EINVAL;
+       return nfp_nsp_command_buf_dma(nsp, arg, max_size, dma_order);
 }
 
 int nfp_nsp_wait(struct nfp_nsp *state)