#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-SEC("encap")
-int encap_f(struct __sk_buff *skb)
+static int encap_ipv4(struct __sk_buff *skb)
{
struct iphdr iph_outer, iph_inner;
struct tcphdr tcph;
- if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
- return TC_ACT_OK;
-
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
return TC_ACT_OK;
return TC_ACT_OK;
}
-SEC("decap")
-int decap_f(struct __sk_buff *skb)
+static int encap_ipv6(struct __sk_buff *skb)
{
- struct iphdr iph_outer, iph_inner;
+ struct ipv6hdr iph_outer, iph_inner;
+ struct tcphdr tcph;
- if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+ sizeof(iph_inner)) < 0)
return TC_ACT_OK;
- if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
- sizeof(iph_outer)) < 0)
+ /* filter only packets we want */
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+ &tcph, sizeof(tcph)) < 0)
return TC_ACT_OK;
- if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+ if (tcph.dest != __bpf_constant_htons(cfg_port))
+ return TC_ACT_OK;
+
+ /* add room between mac and network header */
+ if (bpf_skb_adjust_room(skb, sizeof(iph_outer), BPF_ADJ_ROOM_NET, 0))
+ return TC_ACT_SHOT;
+
+ /* prepare new outer network header */
+ iph_outer = iph_inner;
+ iph_outer.nexthdr = IPPROTO_IPV6;
+ iph_outer.payload_len = bpf_htons(sizeof(iph_outer) +
+ bpf_ntohs(iph_outer.payload_len));
+
+ /* store new outer network header */
+ if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer),
+ BPF_F_INVALIDATE_HASH) < 0)
+ return TC_ACT_SHOT;
+
+ /* bpf_skb_adjust_room has moved header to start of room: restore */
+ if (bpf_skb_store_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+ &iph_inner, sizeof(iph_inner),
+ BPF_F_INVALIDATE_HASH) < 0)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+SEC("encap")
+int encap_f(struct __sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ return encap_ipv4(skb);
+ case __bpf_constant_htons(ETH_P_IPV6):
+ return encap_ipv6(skb);
+ default:
+ /* does not match, ignore */
return TC_ACT_OK;
+ }
+}
- if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_outer),
- &iph_inner, sizeof(iph_inner)) < 0)
+static int decap_internal(struct __sk_buff *skb, int off, int len)
+{
+ char buf[sizeof(struct ipv6hdr)];
+
+ if (bpf_skb_load_bytes(skb, off + len, &buf, len) < 0)
return TC_ACT_OK;
- if (bpf_skb_adjust_room(skb, -(int)sizeof(iph_outer),
- BPF_ADJ_ROOM_NET, 0))
+ if (bpf_skb_adjust_room(skb, -len, BPF_ADJ_ROOM_NET, 0))
return TC_ACT_SHOT;
/* bpf_skb_adjust_room has moved outer over inner header: restore */
- if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_inner, sizeof(iph_inner),
- BPF_F_INVALIDATE_HASH) < 0)
+ if (bpf_skb_store_bytes(skb, off, buf, len, BPF_F_INVALIDATE_HASH) < 0)
return TC_ACT_SHOT;
return TC_ACT_OK;
}
+static int decap_ipv4(struct __sk_buff *skb)
+{
+ struct iphdr iph_outer;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+ sizeof(iph_outer)) < 0)
+ return TC_ACT_OK;
+
+ if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+ return TC_ACT_OK;
+
+ return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+}
+
+static int decap_ipv6(struct __sk_buff *skb)
+{
+ struct ipv6hdr iph_outer;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+ sizeof(iph_outer)) < 0)
+ return TC_ACT_OK;
+
+ if (iph_outer.nexthdr != IPPROTO_IPV6)
+ return TC_ACT_OK;
+
+ return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+}
+
+SEC("decap")
+int decap_f(struct __sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ return decap_ipv4(skb);
+ case __bpf_constant_htons(ETH_P_IPV6):
+ return decap_ipv6(skb);
+ default:
+ /* does not match, ignore */
+ return TC_ACT_OK;
+ }
+}
+
char __license[] SEC("license") = "GPL";
readonly ns1_v4=192.168.1.1
readonly ns2_v4=192.168.1.2
+readonly ns1_v6=fd::1
+readonly ns2_v6=fd::2
+
setup() {
ip netns add "${ns1}"
ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
+ ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
+ ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
sleep 1
}
}
server_listen() {
- ip netns exec "${ns2}" nc -l -p "${port}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" &
sleep 0.2
}
client_connect() {
- ip netns exec "${ns1}" nc -z -w 1 "${ns2_v4}" "${port}"
+ ip netns exec "${ns1}" nc "${netcat_opt}" -z -w 1 "${addr2}" "${port}"
echo $?
}
set -e
+
+# no arguments: automated test, run all
+if [[ "$#" -eq "0" ]]; then
+ echo "ipip"
+ $0 ipv4
+
+ echo "ip6ip6"
+ $0 ipv6
+
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+if [[ "$#" -ne "1" ]]; then
+ echo "Usage: $0"
+ echo " or: $0 <ipv4|ipv6>"
+ exit 1
+fi
+
+case "$1" in
+"ipv4")
+ readonly tuntype=ipip
+ readonly addr1="${ns1_v4}"
+ readonly addr2="${ns2_v4}"
+ readonly netcat_opt=-4
+ ;;
+"ipv6")
+ readonly tuntype=ip6tnl
+ readonly addr1="${ns1_v6}"
+ readonly addr2="${ns2_v6}"
+ readonly netcat_opt=-6
+ ;;
+*)
+ echo "unknown arg: $1"
+ exit 1
+ ;;
+esac
+
+echo "encap ${addr1} to ${addr2}, type ${tuntype}"
+
trap cleanup EXIT
setup
# serverside, insert decap module
# server is still running
# client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type ipip \
- remote "${ns1_v4}" local "${ns2_v4}"
+ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
+ remote "${addr1}" local "${addr2}"
ip netns exec "${ns2}" ip link set dev testtun0 up
echo "test bpf encap with tunnel device decap"
client_connect