net: optimize Berkeley Packet Filter (BPF) processing
authorHagen Paul Pfeifer <hagen@jauu.net>
Sat, 19 Jun 2010 17:05:36 +0000 (17:05 +0000)
committerDavid S. Miller <davem@davemloft.net>
Sat, 26 Jun 2010 04:33:12 +0000 (21:33 -0700)
Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/filter.h
net/core/filter.c

index 151f5d703b7e19d158e500d8ebdf688b3d3dd047..69b43dbea6c6ac444dfbf8c0daf1650942a9a9b1 100644 (file)
@@ -91,6 +91,54 @@ struct sock_fprog {  /* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
+enum {
+       BPF_S_RET_K = 0,
+       BPF_S_RET_A,
+       BPF_S_ALU_ADD_K,
+       BPF_S_ALU_ADD_X,
+       BPF_S_ALU_SUB_K,
+       BPF_S_ALU_SUB_X,
+       BPF_S_ALU_MUL_K,
+       BPF_S_ALU_MUL_X,
+       BPF_S_ALU_DIV_X,
+       BPF_S_ALU_AND_K,
+       BPF_S_ALU_AND_X,
+       BPF_S_ALU_OR_K,
+       BPF_S_ALU_OR_X,
+       BPF_S_ALU_LSH_K,
+       BPF_S_ALU_LSH_X,
+       BPF_S_ALU_RSH_K,
+       BPF_S_ALU_RSH_X,
+       BPF_S_ALU_NEG,
+       BPF_S_LD_W_ABS,
+       BPF_S_LD_H_ABS,
+       BPF_S_LD_B_ABS,
+       BPF_S_LD_W_LEN,
+       BPF_S_LD_W_IND,
+       BPF_S_LD_H_IND,
+       BPF_S_LD_B_IND,
+       BPF_S_LD_IMM,
+       BPF_S_LDX_W_LEN,
+       BPF_S_LDX_B_MSH,
+       BPF_S_LDX_IMM,
+       BPF_S_MISC_TAX,
+       BPF_S_MISC_TXA,
+       BPF_S_ALU_DIV_K,
+       BPF_S_LD_MEM,
+       BPF_S_LDX_MEM,
+       BPF_S_ST,
+       BPF_S_STX,
+       BPF_S_JMP_JA,
+       BPF_S_JMP_JEQ_K,
+       BPF_S_JMP_JEQ_X,
+       BPF_S_JMP_JGE_K,
+       BPF_S_JMP_JGE_X,
+       BPF_S_JMP_JGT_K,
+       BPF_S_JMP_JGT_X,
+       BPF_S_JMP_JSET_K,
+       BPF_S_JMP_JSET_X,
+};
+
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
index da69fb728d328f1b47236cebaaea7ae26f3bcd47..52b051f82a016e37c792efc8bc30dca3d71fd537 100644 (file)
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
                fentry = &filter[pc];
 
                switch (fentry->code) {
-               case BPF_ALU|BPF_ADD|BPF_X:
+               case BPF_S_ALU_ADD_X:
                        A += X;
                        continue;
-               case BPF_ALU|BPF_ADD|BPF_K:
+               case BPF_S_ALU_ADD_K:
                        A += fentry->k;
                        continue;
-               case BPF_ALU|BPF_SUB|BPF_X:
+               case BPF_S_ALU_SUB_X:
                        A -= X;
                        continue;
-               case BPF_ALU|BPF_SUB|BPF_K:
+               case BPF_S_ALU_SUB_K:
                        A -= fentry->k;
                        continue;
-               case BPF_ALU|BPF_MUL|BPF_X:
+               case BPF_S_ALU_MUL_X:
                        A *= X;
                        continue;
-               case BPF_ALU|BPF_MUL|BPF_K:
+               case BPF_S_ALU_MUL_K:
                        A *= fentry->k;
                        continue;
-               case BPF_ALU|BPF_DIV|BPF_X:
+               case BPF_S_ALU_DIV_X:
                        if (X == 0)
                                return 0;
                        A /= X;
                        continue;
-               case BPF_ALU|BPF_DIV|BPF_K:
+               case BPF_S_ALU_DIV_K:
                        A /= fentry->k;
                        continue;
-               case BPF_ALU|BPF_AND|BPF_X:
+               case BPF_S_ALU_AND_X:
                        A &= X;
                        continue;
-               case BPF_ALU|BPF_AND|BPF_K:
+               case BPF_S_ALU_AND_K:
                        A &= fentry->k;
                        continue;
-               case BPF_ALU|BPF_OR|BPF_X:
+               case BPF_S_ALU_OR_X:
                        A |= X;
                        continue;
-               case BPF_ALU|BPF_OR|BPF_K:
+               case BPF_S_ALU_OR_K:
                        A |= fentry->k;
                        continue;
-               case BPF_ALU|BPF_LSH|BPF_X:
+               case BPF_S_ALU_LSH_X:
                        A <<= X;
                        continue;
-               case BPF_ALU|BPF_LSH|BPF_K:
+               case BPF_S_ALU_LSH_K:
                        A <<= fentry->k;
                        continue;
-               case BPF_ALU|BPF_RSH|BPF_X:
+               case BPF_S_ALU_RSH_X:
                        A >>= X;
                        continue;
-               case BPF_ALU|BPF_RSH|BPF_K:
+               case BPF_S_ALU_RSH_K:
                        A >>= fentry->k;
                        continue;
-               case BPF_ALU|BPF_NEG:
+               case BPF_S_ALU_NEG:
                        A = -A;
                        continue;
-               case BPF_JMP|BPF_JA:
+               case BPF_S_JMP_JA:
                        pc += fentry->k;
                        continue;
-               case BPF_JMP|BPF_JGT|BPF_K:
+               case BPF_S_JMP_JGT_K:
                        pc += (A > fentry->k) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JGE|BPF_K:
+               case BPF_S_JMP_JGE_K:
                        pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JEQ|BPF_K:
+               case BPF_S_JMP_JEQ_K:
                        pc += (A == fentry->k) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JSET|BPF_K:
+               case BPF_S_JMP_JSET_K:
                        pc += (A & fentry->k) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JGT|BPF_X:
+               case BPF_S_JMP_JGT_X:
                        pc += (A > X) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JGE|BPF_X:
+               case BPF_S_JMP_JGE_X:
                        pc += (A >= X) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JEQ|BPF_X:
+               case BPF_S_JMP_JEQ_X:
                        pc += (A == X) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_JMP|BPF_JSET|BPF_X:
+               case BPF_S_JMP_JSET_X:
                        pc += (A & X) ? fentry->jt : fentry->jf;
                        continue;
-               case BPF_LD|BPF_W|BPF_ABS:
+               case BPF_S_LD_W_ABS:
                        k = fentry->k;
 load_w:
                        ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
                                continue;
                        }
                        break;
-               case BPF_LD|BPF_H|BPF_ABS:
+               case BPF_S_LD_H_ABS:
                        k = fentry->k;
 load_h:
                        ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
                                continue;
                        }
                        break;
-               case BPF_LD|BPF_B|BPF_ABS:
+               case BPF_S_LD_B_ABS:
                        k = fentry->k;
 load_b:
                        ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
                                continue;
                        }
                        break;
-               case BPF_LD|BPF_W|BPF_LEN:
+               case BPF_S_LD_W_LEN:
                        A = skb->len;
                        continue;
-               case BPF_LDX|BPF_W|BPF_LEN:
+               case BPF_S_LDX_W_LEN:
                        X = skb->len;
                        continue;
-               case BPF_LD|BPF_W|BPF_IND:
+               case BPF_S_LD_W_IND:
                        k = X + fentry->k;
                        goto load_w;
-               case BPF_LD|BPF_H|BPF_IND:
+               case BPF_S_LD_H_IND:
                        k = X + fentry->k;
                        goto load_h;
-               case BPF_LD|BPF_B|BPF_IND:
+               case BPF_S_LD_B_IND:
                        k = X + fentry->k;
                        goto load_b;
-               case BPF_LDX|BPF_B|BPF_MSH:
+               case BPF_S_LDX_B_MSH:
                        ptr = load_pointer(skb, fentry->k, 1, &tmp);
                        if (ptr != NULL) {
                                X = (*(u8 *)ptr & 0xf) << 2;
                                continue;
                        }
                        return 0;
-               case BPF_LD|BPF_IMM:
+               case BPF_S_LD_IMM:
                        A = fentry->k;
                        continue;
-               case BPF_LDX|BPF_IMM:
+               case BPF_S_LDX_IMM:
                        X = fentry->k;
                        continue;
-               case BPF_LD|BPF_MEM:
+               case BPF_S_LD_MEM:
                        A = mem[fentry->k];
                        continue;
-               case BPF_LDX|BPF_MEM:
+               case BPF_S_LDX_MEM:
                        X = mem[fentry->k];
                        continue;
-               case BPF_MISC|BPF_TAX:
+               case BPF_S_MISC_TAX:
                        X = A;
                        continue;
-               case BPF_MISC|BPF_TXA:
+               case BPF_S_MISC_TXA:
                        A = X;
                        continue;
-               case BPF_RET|BPF_K:
+               case BPF_S_RET_K:
                        return fentry->k;
-               case BPF_RET|BPF_A:
+               case BPF_S_RET_A:
                        return A;
-               case BPF_ST:
+               case BPF_S_ST:
                        mem[fentry->k] = A;
                        continue;
-               case BPF_STX:
+               case BPF_S_STX:
                        mem[fentry->k] = X;
                        continue;
                default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                /* Only allow valid instructions */
                switch (ftest->code) {
                case BPF_ALU|BPF_ADD|BPF_K:
+                       ftest->code = BPF_S_ALU_ADD_K;
+                       break;
                case BPF_ALU|BPF_ADD|BPF_X:
+                       ftest->code = BPF_S_ALU_ADD_X;
+                       break;
                case BPF_ALU|BPF_SUB|BPF_K:
+                       ftest->code = BPF_S_ALU_SUB_K;
+                       break;
                case BPF_ALU|BPF_SUB|BPF_X:
+                       ftest->code = BPF_S_ALU_SUB_X;
+                       break;
                case BPF_ALU|BPF_MUL|BPF_K:
+                       ftest->code = BPF_S_ALU_MUL_K;
+                       break;
                case BPF_ALU|BPF_MUL|BPF_X:
+                       ftest->code = BPF_S_ALU_MUL_X;
+                       break;
                case BPF_ALU|BPF_DIV|BPF_X:
+                       ftest->code = BPF_S_ALU_DIV_X;
+                       break;
                case BPF_ALU|BPF_AND|BPF_K:
+                       ftest->code = BPF_S_ALU_AND_K;
+                       break;
                case BPF_ALU|BPF_AND|BPF_X:
+                       ftest->code = BPF_S_ALU_AND_X;
+                       break;
                case BPF_ALU|BPF_OR|BPF_K:
+                       ftest->code = BPF_S_ALU_OR_K;
+                       break;
                case BPF_ALU|BPF_OR|BPF_X:
+                       ftest->code = BPF_S_ALU_OR_X;
+                       break;
                case BPF_ALU|BPF_LSH|BPF_K:
+                       ftest->code = BPF_S_ALU_LSH_K;
+                       break;
                case BPF_ALU|BPF_LSH|BPF_X:
+                       ftest->code = BPF_S_ALU_LSH_X;
+                       break;
                case BPF_ALU|BPF_RSH|BPF_K:
+                       ftest->code = BPF_S_ALU_RSH_K;
+                       break;
                case BPF_ALU|BPF_RSH|BPF_X:
+                       ftest->code = BPF_S_ALU_RSH_X;
+                       break;
                case BPF_ALU|BPF_NEG:
+                       ftest->code = BPF_S_ALU_NEG;
+                       break;
                case BPF_LD|BPF_W|BPF_ABS:
+                       ftest->code = BPF_S_LD_W_ABS;
+                       break;
                case BPF_LD|BPF_H|BPF_ABS:
+                       ftest->code = BPF_S_LD_H_ABS;
+                       break;
                case BPF_LD|BPF_B|BPF_ABS:
+                       ftest->code = BPF_S_LD_B_ABS;
+                       break;
                case BPF_LD|BPF_W|BPF_LEN:
+                       ftest->code = BPF_S_LD_W_LEN;
+                       break;
                case BPF_LD|BPF_W|BPF_IND:
+                       ftest->code = BPF_S_LD_W_IND;
+                       break;
                case BPF_LD|BPF_H|BPF_IND:
+                       ftest->code = BPF_S_LD_H_IND;
+                       break;
                case BPF_LD|BPF_B|BPF_IND:
+                       ftest->code = BPF_S_LD_B_IND;
+                       break;
                case BPF_LD|BPF_IMM:
+                       ftest->code = BPF_S_LD_IMM;
+                       break;
                case BPF_LDX|BPF_W|BPF_LEN:
+                       ftest->code = BPF_S_LDX_W_LEN;
+                       break;
                case BPF_LDX|BPF_B|BPF_MSH:
+                       ftest->code = BPF_S_LDX_B_MSH;
+                       break;
                case BPF_LDX|BPF_IMM:
+                       ftest->code = BPF_S_LDX_IMM;
+                       break;
                case BPF_MISC|BPF_TAX:
+                       ftest->code = BPF_S_MISC_TAX;
+                       break;
                case BPF_MISC|BPF_TXA:
+                       ftest->code = BPF_S_MISC_TXA;
+                       break;
                case BPF_RET|BPF_K:
+                       ftest->code = BPF_S_RET_K;
+                       break;
                case BPF_RET|BPF_A:
+                       ftest->code = BPF_S_RET_A;
                        break;
 
                /* Some instructions need special checks */
 
-               case BPF_ALU|BPF_DIV|BPF_K:
                        /* check for division by zero */
+               case BPF_ALU|BPF_DIV|BPF_K:
                        if (ftest->k == 0)
                                return -EINVAL;
+                       ftest->code = BPF_S_ALU_DIV_K;
                        break;
 
+               /* check for invalid memory addresses */
                case BPF_LD|BPF_MEM:
+                       if (ftest->k >= BPF_MEMWORDS)
+                               return -EINVAL;
+                       ftest->code = BPF_S_LD_MEM;
+                       break;
                case BPF_LDX|BPF_MEM:
+                       if (ftest->k >= BPF_MEMWORDS)
+                               return -EINVAL;
+                       ftest->code = BPF_S_LDX_MEM;
+                       break;
                case BPF_ST:
+                       if (ftest->k >= BPF_MEMWORDS)
+                               return -EINVAL;
+                       ftest->code = BPF_S_ST;
+                       break;
                case BPF_STX:
-                       /* check for invalid memory addresses */
                        if (ftest->k >= BPF_MEMWORDS)
                                return -EINVAL;
+                       ftest->code = BPF_S_STX;
                        break;
 
                case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                         */
                        if (ftest->k >= (unsigned)(flen-pc-1))
                                return -EINVAL;
+                       ftest->code = BPF_S_JMP_JA;
                        break;
 
                case BPF_JMP|BPF_JEQ|BPF_K:
+                       ftest->code = BPF_S_JMP_JEQ_K;
+                       break;
                case BPF_JMP|BPF_JEQ|BPF_X:
+                       ftest->code = BPF_S_JMP_JEQ_X;
+                       break;
                case BPF_JMP|BPF_JGE|BPF_K:
+                       ftest->code = BPF_S_JMP_JGE_K;
+                       break;
                case BPF_JMP|BPF_JGE|BPF_X:
+                       ftest->code = BPF_S_JMP_JGE_X;
+                       break;
                case BPF_JMP|BPF_JGT|BPF_K:
+                       ftest->code = BPF_S_JMP_JGT_K;
+                       break;
                case BPF_JMP|BPF_JGT|BPF_X:
+                       ftest->code = BPF_S_JMP_JGT_X;
+                       break;
                case BPF_JMP|BPF_JSET|BPF_K:
+                       ftest->code = BPF_S_JMP_JSET_K;
+                       break;
                case BPF_JMP|BPF_JSET|BPF_X:
+                       ftest->code = BPF_S_JMP_JSET_X;
+                       break;
+
+               default:
+                       return -EINVAL;
+               }
+
                        /* for conditionals both must be safe */
+               switch (ftest->code) {
+               case BPF_S_JMP_JEQ_K:
+               case BPF_S_JMP_JEQ_X:
+               case BPF_S_JMP_JGE_K:
+               case BPF_S_JMP_JGE_X:
+               case BPF_S_JMP_JGT_K:
+               case BPF_S_JMP_JGT_X:
+               case BPF_S_JMP_JSET_X:
+               case BPF_S_JMP_JSET_K:
                        if (pc + ftest->jt + 1 >= flen ||
                            pc + ftest->jf + 1 >= flen)
                                return -EINVAL;
-                       break;
+               }
+       }
 
+       /* last instruction must be a RET code */
+       switch (filter[flen - 1].code) {
+       case BPF_S_RET_K:
+       case BPF_S_RET_A:
+               return 0;
+               break;
                default:
                        return -EINVAL;
                }
-       }
-
-       return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);