sk-filter: Add ability to get socket filter program (v2)
authorPavel Emelyanov <xemul@parallels.com>
Thu, 1 Nov 2012 02:01:48 +0000 (02:01 +0000)
committerDavid S. Miller <davem@davemloft.net>
Thu, 1 Nov 2012 15:17:15 +0000 (11:17 -0400)
The SO_ATTACH_FILTER option is set only. I propose to add the get
ability by using SO_ATTACH_FILTER in getsockopt. To be less
irritating to eyes the SO_GET_FILTER alias to it is declared. This
ability is required by checkpoint-restore project to be able to
save full state of a socket.

There are two issues with getting filter back.

First, kernel modifies the sock_filter->code on filter load, thus in
order to return the filter element back to user we have to decode it
into user-visible constants. Fortunately the modification in question
is interconvertible.

Second, the BPF_S_ALU_DIV_K code modifies the command argument k to
speed up the run-time division by doing kernel_k = reciprocal(user_k).
Bad news is that different user_k may result in same kernel_k, so we
can't get the original user_k back. Good news is that we don't have
to do it. What we need to is calculate a user2_k so, that

  reciprocal(user2_k) == reciprocal(user_k) == kernel_k

i.e. if it's re-loaded back the compiled again value will be exactly
the same as it was. That said, the user2_k can be calculated like this

  user2_k = reciprocal(kernel_k)

with an exception, that if kernel_k == 0, then user2_k == 1.

The optlen argument is treated like this -- when zero, kernel returns
the amount of sock_fprog elements in filter, otherwise it should be
large enough for the sock_fprog array.

changes since v1:
* Declared SO_GET_FILTER in all arch headers
* Added decode of vlan-tag codes

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
19 files changed:
arch/alpha/include/asm/socket.h
arch/avr32/include/uapi/asm/socket.h
arch/cris/include/asm/socket.h
arch/frv/include/uapi/asm/socket.h
arch/h8300/include/asm/socket.h
arch/ia64/include/uapi/asm/socket.h
arch/m32r/include/asm/socket.h
arch/m68k/include/asm/socket.h
arch/mips/include/uapi/asm/socket.h
arch/mn10300/include/uapi/asm/socket.h
arch/parisc/include/asm/socket.h
arch/powerpc/include/uapi/asm/socket.h
arch/s390/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
arch/xtensa/include/asm/socket.h
include/linux/filter.h
include/uapi/asm-generic/socket.h
net/core/filter.c
net/core/sock.c

index 7d2f75be932e6d16e4ceba058fe0e0c769a8be32..0087d053b77f9ada256b4d58fec451552d48eb63 100644 (file)
@@ -47,6 +47,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index a473f8c6a9aa2be66f04f375ef7b7820b7c0683a..486df68abeecc955757a352cf64e70ad9b39a020 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index ae52825021afd98237557656abf77dad4a75491f..b681b043f6c819fedc4b819c5ffa59a72cd7059f 100644 (file)
@@ -42,6 +42,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index a5b1d7dbb205389eef5e2d5627f0aad3744778fe..871f89b7fbdaf08a0629679f01d804337804cbc7 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP           29
index ec4554e7b04b68ab4c41ad93e612469ac896708c..90a2e573c7e679ac92300734e3641f7d5d5b45e0 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP           29
index 41fc28a4a18a2305763db7be6e0d51210671030e..23d6759bb57b8dd6af9a824e78ce7335dede1d80 100644 (file)
@@ -49,6 +49,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER       26
 #define SO_DETACH_FILTER       27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index a15f40b527834256eababa34dd23d076cf003e2d..5e7088a26726cc0133ae3a6f166f3597832646a6 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index d1be684edf97925331569d9af422a5f427e2a271..285da3b6ad92c440dabaec8dfbeea9b8ccf6613b 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP           29
index c5ed59549cb878c211146c8ac86c8eaff354c525..17307ab90474271e4c77f9d48bfb775f2049db74 100644 (file)
@@ -63,6 +63,7 @@ To add: #define SO_REUSEPORT 0x0200   /* Allow local address and port reuse.  */
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME             28
 #define SO_TIMESTAMP           29
index 820463a484b8dbc5b21ee627822297d4a33bf91f..af5366bbfe62727b09e2d47474e78a7483bb3a13 100644 (file)
@@ -40,6 +40,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index 1b52c2c31a7a284c8e4f6b6d08cc53faa60da42f..d9ff4731253bb6e8f6402686e2401e251126ecb2 100644 (file)
@@ -48,6 +48,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        0x401a
 #define SO_DETACH_FILTER        0x401b
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_ACCEPTCONN          0x401c
 
index 3d5179bb122f120e24477c3be6f1459182b8bec5..eb0b1864d400b2a01386ed3a77fedb300a560227 100644 (file)
@@ -47,6 +47,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER       26
 #define SO_DETACH_FILTER       27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index 69718cd6d63503e657a4878a1277ab260fa44bf7..436d07c23be8febc9ad245fb73e4f1671a004cd9 100644 (file)
@@ -46,6 +46,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index bea1568ae4af1aeee10008fbf1ae110dc6fa47ae..c83a937ead00676eefef950b6c00d77cd644c8a9 100644 (file)
@@ -41,6 +41,7 @@
 
 #define SO_ATTACH_FILTER       0x001a
 #define SO_DETACH_FILTER        0x001b
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            0x001c
 #define SO_TIMESTAMP           0x001d
index e36c681849205cd728bbc59b5903c87f61a81a73..38079be1cf1ebc98fa6d9a61aebd59ff81822f78 100644 (file)
@@ -52,6 +52,7 @@
 
 #define SO_ATTACH_FILTER        26
 #define SO_DETACH_FILTER        27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index c9f0005c35e22d6b0e1df0a2b8f5f327ee1daa78..c45eabc135e1f00b7825d017b37df1666acb203e 100644 (file)
@@ -45,6 +45,7 @@ extern void sk_unattached_filter_destroy(struct sk_filter *fp);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
+extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len);
 
 #ifdef CONFIG_BPF_JIT
 extern void bpf_jit_compile(struct sk_filter *fp);
index b1bea03274d5dd0ca36d31e90b361d0dc63225ec..2d32d073a6f9e4275d47c27cbd63b169a3267b9a 100644 (file)
@@ -43,6 +43,7 @@
 /* Socket filtering */
 #define SO_ATTACH_FILTER       26
 #define SO_DETACH_FILTER       27
+#define SO_GET_FILTER          SO_ATTACH_FILTER
 
 #define SO_PEERNAME            28
 #define SO_TIMESTAMP           29
index 5a114d41bf1130fa9424f5b9c2125f4ae5e515b0..c23543cba132bc19a213ed242e47386e8e41ca65 100644 (file)
@@ -760,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
        return ret;
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
+
+static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+{
+       static const u16 decodes[] = {
+               [BPF_S_ALU_ADD_K]       = BPF_ALU|BPF_ADD|BPF_K,
+               [BPF_S_ALU_ADD_X]       = BPF_ALU|BPF_ADD|BPF_X,
+               [BPF_S_ALU_SUB_K]       = BPF_ALU|BPF_SUB|BPF_K,
+               [BPF_S_ALU_SUB_X]       = BPF_ALU|BPF_SUB|BPF_X,
+               [BPF_S_ALU_MUL_K]       = BPF_ALU|BPF_MUL|BPF_K,
+               [BPF_S_ALU_MUL_X]       = BPF_ALU|BPF_MUL|BPF_X,
+               [BPF_S_ALU_DIV_X]       = BPF_ALU|BPF_DIV|BPF_X,
+               [BPF_S_ALU_MOD_K]       = BPF_ALU|BPF_MOD|BPF_K,
+               [BPF_S_ALU_MOD_X]       = BPF_ALU|BPF_MOD|BPF_X,
+               [BPF_S_ALU_AND_K]       = BPF_ALU|BPF_AND|BPF_K,
+               [BPF_S_ALU_AND_X]       = BPF_ALU|BPF_AND|BPF_X,
+               [BPF_S_ALU_OR_K]        = BPF_ALU|BPF_OR|BPF_K,
+               [BPF_S_ALU_OR_X]        = BPF_ALU|BPF_OR|BPF_X,
+               [BPF_S_ALU_XOR_K]       = BPF_ALU|BPF_XOR|BPF_K,
+               [BPF_S_ALU_XOR_X]       = BPF_ALU|BPF_XOR|BPF_X,
+               [BPF_S_ALU_LSH_K]       = BPF_ALU|BPF_LSH|BPF_K,
+               [BPF_S_ALU_LSH_X]       = BPF_ALU|BPF_LSH|BPF_X,
+               [BPF_S_ALU_RSH_K]       = BPF_ALU|BPF_RSH|BPF_K,
+               [BPF_S_ALU_RSH_X]       = BPF_ALU|BPF_RSH|BPF_X,
+               [BPF_S_ALU_NEG]         = BPF_ALU|BPF_NEG,
+               [BPF_S_LD_W_ABS]        = BPF_LD|BPF_W|BPF_ABS,
+               [BPF_S_LD_H_ABS]        = BPF_LD|BPF_H|BPF_ABS,
+               [BPF_S_LD_B_ABS]        = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_PROTOCOL]    = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_PKTTYPE]     = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_IFINDEX]     = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_NLATTR]      = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_MARK]        = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_QUEUE]       = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_HATYPE]      = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_RXHASH]      = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_CPU]         = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_ALU_XOR_X]   = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_VLAN_TAG]    = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+               [BPF_S_LD_W_LEN]        = BPF_LD|BPF_W|BPF_LEN,
+               [BPF_S_LD_W_IND]        = BPF_LD|BPF_W|BPF_IND,
+               [BPF_S_LD_H_IND]        = BPF_LD|BPF_H|BPF_IND,
+               [BPF_S_LD_B_IND]        = BPF_LD|BPF_B|BPF_IND,
+               [BPF_S_LD_IMM]          = BPF_LD|BPF_IMM,
+               [BPF_S_LDX_W_LEN]       = BPF_LDX|BPF_W|BPF_LEN,
+               [BPF_S_LDX_B_MSH]       = BPF_LDX|BPF_B|BPF_MSH,
+               [BPF_S_LDX_IMM]         = BPF_LDX|BPF_IMM,
+               [BPF_S_MISC_TAX]        = BPF_MISC|BPF_TAX,
+               [BPF_S_MISC_TXA]        = BPF_MISC|BPF_TXA,
+               [BPF_S_RET_K]           = BPF_RET|BPF_K,
+               [BPF_S_RET_A]           = BPF_RET|BPF_A,
+               [BPF_S_ALU_DIV_K]       = BPF_ALU|BPF_DIV|BPF_K,
+               [BPF_S_LD_MEM]          = BPF_LD|BPF_MEM,
+               [BPF_S_LDX_MEM]         = BPF_LDX|BPF_MEM,
+               [BPF_S_ST]              = BPF_ST,
+               [BPF_S_STX]             = BPF_STX,
+               [BPF_S_JMP_JA]          = BPF_JMP|BPF_JA,
+               [BPF_S_JMP_JEQ_K]       = BPF_JMP|BPF_JEQ|BPF_K,
+               [BPF_S_JMP_JEQ_X]       = BPF_JMP|BPF_JEQ|BPF_X,
+               [BPF_S_JMP_JGE_K]       = BPF_JMP|BPF_JGE|BPF_K,
+               [BPF_S_JMP_JGE_X]       = BPF_JMP|BPF_JGE|BPF_X,
+               [BPF_S_JMP_JGT_K]       = BPF_JMP|BPF_JGT|BPF_K,
+               [BPF_S_JMP_JGT_X]       = BPF_JMP|BPF_JGT|BPF_X,
+               [BPF_S_JMP_JSET_K]      = BPF_JMP|BPF_JSET|BPF_K,
+               [BPF_S_JMP_JSET_X]      = BPF_JMP|BPF_JSET|BPF_X,
+       };
+       u16 code;
+
+       code = filt->code;
+
+       to->code = decodes[code];
+       to->jt = filt->jt;
+       to->jf = filt->jf;
+
+       if (code == BPF_S_ALU_DIV_K) {
+               /*
+                * When loaded this rule user gave us X, which was
+                * translated into R = r(X). Now we calculate the
+                * RR = r(R) and report it back. If next time this
+                * value is loaded and RRR = r(RR) is calculated
+                * then the R == RRR will be true.
+                *
+                * One exception. X == 1 translates into R == 0 and
+                * we can't calculate RR out of it with r().
+                */
+
+               if (filt->k == 0)
+                       to->k = 1;
+               else
+                       to->k = reciprocal_value(filt->k);
+
+               BUG_ON(reciprocal_value(to->k) != filt->k);
+       } else
+               to->k = filt->k;
+}
+
+int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
+{
+       struct sk_filter *filter;
+       int i, ret;
+
+       lock_sock(sk);
+       filter = rcu_dereference_protected(sk->sk_filter,
+                       sock_owned_by_user(sk));
+       ret = 0;
+       if (!filter)
+               goto out;
+       ret = filter->len;
+       if (!len)
+               goto out;
+       ret = -EINVAL;
+       if (len < filter->len)
+               goto out;
+
+       ret = -EFAULT;
+       for (i = 0; i < filter->len; i++) {
+               struct sock_filter fb;
+
+               sk_decode_filter(&filter->insns[i], &fb);
+               if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
+                       goto out;
+       }
+
+       ret = filter->len;
+out:
+       release_sock(sk);
+       return ret;
+}
index 0a023b8daa554867fb8b2a76335d997aa61f5520..06286006a2cc2be32a472a4453df7d088e1e17b6 100644 (file)
@@ -1077,6 +1077,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
        case SO_BINDTODEVICE:
                v.val = sk->sk_bound_dev_if;
                break;
+       case SO_GET_FILTER:
+               len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+               if (len < 0)
+                       return len;
+
+               goto lenout;
        default:
                return -ENOPROTOOPT;
        }