emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
}
+static void
+addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ swreg *rega, swreg *regb)
+{
+ if (offset == reg_imm(0)) {
+ *rega = reg_a(src_gpr);
+ *regb = reg_b(src_gpr + 1);
+ return;
+ }
+
+ emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
+ emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
+ reg_imm(0));
+ *rega = imm_a(nfp_prog);
+ *regb = imm_b(nfp_prog);
+}
+
/* NFP has Command Push Pull bus which supports bluk memory operations. */
static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
bool descending_seq = meta->ldst_gather_len < 0;
s16 len = abs(meta->ldst_gather_len);
swreg src_base, off;
+ bool src_40bit_addr;
unsigned int i;
u8 xfer_num;
off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+ src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
src_base = reg_a(meta->insn.src_reg * 2);
xfer_num = round_up(len, 4) / 4;
+ if (src_40bit_addr)
+ addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
+ &off);
+
/* Setup PREV_ALU fields to override memory read length. */
if (len > 32)
wrp_immed(nfp_prog, reg_none(),
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
/* Memory read from source addr into transfer-in registers. */
- emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
- off, xfer_num - 1, true, len > 32);
+ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
+ src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
+ src_base, off, xfer_num - 1, true, len > 32);
/* Move from transfer-in to transfer-out. */
for (i = 0; i < xfer_num; i++)
}
static int
-data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
- u8 dst_gpr, int size)
+data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
+ swreg lreg, swreg rreg, int size, enum cmd_mode mode)
{
unsigned int i;
u8 mask, sz;
- /* We load the value from the address indicated in @offset and then
+ /* We load the value from the address indicated in rreg + lreg and then
* mask out the data we don't need. Note: this is little endian!
*/
sz = max(size, 4);
mask = size < 4 ? GENMASK(size - 1, 0) : 0;
- emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
- reg_a(src_gpr), offset, sz / 4 - 1, true);
+ emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
+ lreg, rreg, sz / 4 - 1, true);
i = 0;
if (mask)
return 0;
}
+static int
+data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ u8 dst_gpr, u8 size)
+{
+ return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
+ size, CMD_MODE_32b);
+}
+
+static int
+data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ u8 dst_gpr, u8 size)
+{
+ swreg rega, regb;
+
+ addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b);
+
+ return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
+ size, CMD_MODE_40b_BA);
+}
+
static int
construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
{
tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
- return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
- meta->insn.dst_reg * 2, size);
+ return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
+ tmp_reg, meta->insn.dst_reg * 2, size);
+}
+
+static int
+mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ swreg tmp_reg;
+
+ tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+ return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
+ tmp_reg, meta->insn.dst_reg * 2, size);
}
static int
return mem_ldx_stack(nfp_prog, meta, size,
meta->ptr.off + meta->ptr.var_off.value);
+ if (meta->ptr.type == PTR_TO_MAP_VALUE)
+ return mem_ldx_emem(nfp_prog, meta, size);
+
return -EOPNOTSUPP;
}