nfp: bpf: migrate to advanced reciprocal divide in reciprocal_div.h
authorJiong Wang <jiong.wang@netronome.com>
Fri, 6 Jul 2018 22:13:23 +0000 (15:13 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Fri, 6 Jul 2018 23:45:31 +0000 (01:45 +0200)
As we are doing JIT, we would want to use the advanced version of the
reciprocal divide (reciprocal_value_adv) to trade performance with host.

We could reduce the required ALU instructions from 4 to 2 or 1.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
drivers/net/ethernet/netronome/nfp/bpf/jit.c

index 7c9ee3d85907713546ff423c4c96c4400d58e69c..1d9e3683540439da2ee7d6e57fec4080542ed7ba 100644 (file)
@@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
 {
        swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
-       struct reciprocal_value rvalue;
-       swreg tmp_b = imm_b(nfp_prog);
+       struct reciprocal_value_adv rvalue;
+       u8 pre_shift, exp;
        swreg magic;
 
        if (imm > U32_MAX) {
@@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
                return 0;
        }
 
-       rvalue = reciprocal_value(imm);
+       /* NOTE: because we are using "reciprocal_value_adv" which doesn't
+        * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
+        * to handle such case which actually equals to the result of unsigned
+        * comparison "dst >= imm" which could be calculated using the following
+        * NFP sequence:
+        *
+        *  alu[--, dst, -, imm]
+        *  immed[imm, 0]
+        *  alu[dst, imm, +carry, 0]
+        *
+        */
+       if (imm > 1U << 31) {
+               swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+
+               emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
+               wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
+               emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
+                        reg_imm(0));
+               return 0;
+       }
+
+       rvalue = reciprocal_value_adv(imm, 32);
+       exp = rvalue.exp;
+       if (rvalue.is_wide_m && !(imm & 1)) {
+               pre_shift = fls(imm & -imm) - 1;
+               rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+       } else {
+               pre_shift = 0;
+       }
        magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
-       wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic,
-                   true);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh1);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh2);
+       if (imm == 1U << exp) {
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, exp);
+       } else if (rvalue.is_wide_m) {
+               wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
+                           magic, true);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, 1);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, rvalue.sh - 1);
+       } else {
+               if (pre_shift)
+                       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                                dst_b, SHF_SC_R_SHF, pre_shift);
+               wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                        dst_b, SHF_SC_R_SHF, rvalue.sh);
+       }
 
        return 0;
 }