Skip to content

Commit

Permalink
Add tunnel l2 pcap flag for optiona vxlan pcap.
Browse files Browse the repository at this point in the history
If the flag is enabled, packets that appear to be vxlan
encapsulated will have the filtering function applied.
Note: Therefore, to avoid getting non-vxlan traffic you
will want to apply a general pcap filter on the vxlan udp
ports.

As well, the flag --output-tunnel will result in output
of vxlan header data (i.e. flag/vin) as well as inner
address tuple.

Signed-off-by: Tom Hadlaw <tom.hadlaw@isovalent.com>
  • Loading branch information
tommyp1ckles committed Jan 28, 2025
1 parent a6e1e8b commit c1c0dca
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 21 deletions.
113 changes: 108 additions & 5 deletions bpf/kprobe_pwru.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ struct tuple {
u8 tcp_flags;
} __attribute__((packed));

struct l2tuple {
u8 dest[6];
u8 src[6];
} __attribute__((packed));

enum event_type {
EVENT_TYPE_KPROBE = 0,
EVENT_TYPE_KPROBE_MULTI = 1,
Expand All @@ -85,6 +90,8 @@ struct event_t {
u64 print_shinfo_id;
struct skb_meta meta;
struct tuple tuple;
struct tuple tunnel_tuple;
struct l2tuple l2_tuple;
s64 print_stack_id;
u64 param_second;
u64 param_third;
Expand Down Expand Up @@ -146,7 +153,7 @@ struct config {
u8 output_stack: 1;
u8 output_caller: 1;
u8 output_cb: 1;
u8 output_unused: 1;
u8 output_tunnel: 1;
u8 is_set: 1;
u8 track_skb: 1;
u8 track_skb_by_stackid: 1;
Expand Down Expand Up @@ -259,8 +266,55 @@ filter_pcap_l2(struct sk_buff *skb)
return filter_pcap_ebpf_l2((void *)skb, (void *)skb, (void *)skb, data, data_end);
}

static __noinline bool
filter_pcap_ebpf_tunnel_l2(void *_skb, void *__skb, void *___skb, void *data, void* data_end)
{
return data != data_end && _skb == __skb && __skb == ___skb;
}

static __noinline bool
filter_pcap_ebpf_tunnel_l3(void *_skb, void *__skb, void *___skb, void *data, void* data_end)
{
return data != data_end && _skb == __skb && __skb == ___skb;
}

static __always_inline bool
filter_pcap_tunnel_l2(struct sk_buff *skb)
{
void *skb_head = BPF_CORE_READ(skb, head);
void *data = skb_head;
u16 l3_off = BPF_CORE_READ(skb, network_header);
struct iphdr *ip4 = (struct iphdr *) (data + l3_off);
u16 l4_off = l3_off + BPF_CORE_READ_BITFIELD_PROBED(ip4, ihl) * 4;
// For VXLAN, we only care about udp packets.
if (BPF_CORE_READ(ip4, protocol) != IPPROTO_UDP) {
return true;
}
struct vxlan_metadata *vx = (struct vxlan_metadata*) (data + l4_off + 8);
// No VXLAN hdr means don't apply any tunnel filter, but return true to not
// filter non vxlan traffic.
if (BPF_CORE_READ(vx, gbp) != 8) {
return true;
}
data = (void*) (data + l4_off + 8 + 8);
struct ethhdr *eth = (struct ethhdr*) data;
if (BPF_CORE_READ(eth, h_proto) != bpf_htons(ETH_P_IP))
return false;
void *data_end = skb_head + BPF_CORE_READ(skb, tail);
if (!filter_pcap_ebpf_tunnel_l2((void *)skb, (void *)skb, (void *)skb, data, data_end)) {
return false;
}
struct iphdr *iph = (struct iphdr *) (data + sizeof(struct ethhdr));
u32 saddr = BPF_CORE_READ(iph, saddr);
data = (void*) (data + sizeof(struct ethhdr));
return filter_pcap_ebpf_tunnel_l3((void *)skb, (void *)skb, (void *)skb, data, data_end);
}

static __always_inline bool
filter_pcap(struct sk_buff *skb) {
if (!filter_pcap_tunnel_l2(skb)) {
return false;
}
if (BPF_CORE_READ(skb, mac_len) == 0)
return filter_pcap_l3(skb);
return filter_pcap_l2(skb);
Expand Down Expand Up @@ -288,7 +342,31 @@ set_meta(struct sk_buff *skb, struct skb_meta *meta) {
}
}

static __always_inline void
// Returns l4 offset
static __always_inline u16
__set_l3_tuple(void *data, u16 l3_off, bool is_ipv4, union addr *saddr, union addr *daddr, u16 *l3_proto, u8 *l4_proto)
{
u16 l4_off;
if (is_ipv4) {
struct iphdr *ip4 = (struct iphdr *) (data + l3_off);
BPF_CORE_READ_INTO(saddr, ip4, saddr);
BPF_CORE_READ_INTO(daddr, ip4, daddr);
BPF_CORE_READ_INTO(l4_proto, ip4, protocol);
*l3_proto = ETH_P_IP;
l4_off = l3_off + BPF_CORE_READ_BITFIELD_PROBED(ip4, ihl) * 4;

} else {
struct ipv6hdr *ip6 = (struct ipv6hdr *) (data + l3_off);
BPF_CORE_READ_INTO(saddr, ip6, saddr);
BPF_CORE_READ_INTO(daddr, ip6, daddr);
BPF_CORE_READ_INTO(l4_proto, ip6, nexthdr);
*l3_proto = ETH_P_IPV6;
l4_off = l3_off + ipv6_hdrlen(ip6);
}
return l4_off;
}

static __always_inline u16
__set_tuple(struct tuple *tpl, void *data, u16 l3_off, bool is_ipv4) {
u16 l4_off;

Expand All @@ -314,15 +392,18 @@ __set_tuple(struct tuple *tpl, void *data, u16 l3_off, bool is_ipv4) {
tpl->sport= BPF_CORE_READ(tcp, source);
tpl->dport= BPF_CORE_READ(tcp, dest);
bpf_probe_read_kernel(&tpl->tcp_flags, sizeof(tpl->tcp_flags), (void *)tcp + offsetof(struct tcphdr, window) - 1);
return l4_off + sizeof(*tcp);
} else if (tpl->l4_proto == IPPROTO_UDP) {
struct udphdr *udp = (struct udphdr *) (data + l4_off);
tpl->sport= BPF_CORE_READ(udp, source);
tpl->dport= BPF_CORE_READ(udp, dest);
return l4_off + sizeof(*udp);
}
return l4_off;
}

static __always_inline void
set_tuple(struct sk_buff *skb, struct tuple *tpl) {
set_tuple(struct sk_buff *skb, struct tuple *tpl, struct tuple *tunnel_tpl, struct l2tuple *l2_tuple, bool output_tunnel) {
void *skb_head = BPF_CORE_READ(skb, head);
u16 l3_off = BPF_CORE_READ(skb, network_header);

Expand All @@ -333,7 +414,29 @@ set_tuple(struct sk_buff *skb, struct tuple *tpl) {
return;

bool is_ipv4 = ip_vsn == 4;
__set_tuple(tpl, skb_head, l3_off, is_ipv4);
u16 l4_data_off = __set_tuple(tpl, skb_head, l3_off, is_ipv4);

if (!(output_tunnel && tpl->l4_proto == IPPROTO_UDP))
return;

struct vxlan_metadata *vx = (struct vxlan_metadata*) (skb_head + l4_data_off);

// Check if this looks like a vxlan header.
// TODO: Add option for doing similar checks for geneve via flag.
if (BPF_CORE_READ(vx, gbp) != 0x8)
return;

struct ethhdr *inner = (struct ethhdr*) (skb_head + l4_data_off + 2*sizeof(*vx));
BPF_CORE_READ_INTO(&l2_tuple->src, inner, h_source);
BPF_CORE_READ_INTO(&l2_tuple->dest, inner, h_dest);
if (BPF_CORE_READ(inner, h_proto) != bpf_htons(ETH_P_IP))
return;

l3_hdr = (struct iphdr *) (skb_head + l3_off);
ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version);
is_ipv4 = ip_vsn == 4;
l3_off = l4_data_off + 2 * sizeof(*vx) + sizeof(struct ethhdr);
__set_tuple(tunnel_tpl, skb_head, l3_off, is_ipv4);
}

static __always_inline u64
Expand Down Expand Up @@ -433,7 +536,7 @@ set_output(void *ctx, struct sk_buff *skb, struct event_t *event) {
}

if (cfg->output_tuple) {
set_tuple(skb, &event->tuple);
set_tuple(skb, &event->tuple, &event->tunnel_tuple, &event->l2_tuple, cfg->output_tunnel);
}

if (cfg->output_skb) {
Expand Down
35 changes: 26 additions & 9 deletions internal/libpcap/inject.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,54 @@ package libpcap

import (
"errors"
"fmt"

"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cloudflare/cbpfc"
)

func InjectL2TunnelFilter(program *ebpf.ProgramSpec, filterExpr, l2TunnelFilterExpr string) (err error) {
return injectFilter(program, filterExpr, false, true)
}

func InjectL2Filter(program *ebpf.ProgramSpec, filterExpr string) (err error) {
return injectFilter(program, filterExpr, false)
return injectFilter(program, filterExpr, false, false)
}

func InjectFilters(program *ebpf.ProgramSpec, filterExpr string) (err error) {
if err = injectFilter(program, filterExpr, false); err != nil {
func InjectFilters(program *ebpf.ProgramSpec, filterExpr, tunnelFilterL2Expr, tunnelFilterL3Expr string) (err error) {
if err = injectFilter(program, filterExpr, false, false); err != nil {
return
}
if err = injectFilter(program, filterExpr, true); err != nil {
if err = injectFilter(program, filterExpr, true, false); err != nil {
// This could happen for l2 only filters such as "arp". In this
// case we don't want to exit with an error, but instead inject
// a deny-all filter to reject all l3 skbs.
return injectFilter(program, "__pwru_reject_all__", true)
return injectFilter(program, "__pwru_reject_all__", true, false)
}
// Attach any tunnel filters.
if err := injectFilter(program, tunnelFilterL2Expr, false, true); err != nil {
return fmt.Errorf("l2 tunnel filter: %w", err)
}
if err := injectFilter(program, tunnelFilterL3Expr, true, true); err != nil {
return fmt.Errorf("l3 tunnel filter: %w", err)
}
return
return nil
}

func injectFilter(program *ebpf.ProgramSpec, filterExpr string, l3 bool) (err error) {
func injectFilter(program *ebpf.ProgramSpec, filterExpr string, l3 bool, tunnel bool) (err error) {
if filterExpr == "" {
return
}

suffix := "_l2"
tunnelSuffix := ""
if tunnel {
tunnelSuffix = "_tunnel"
}

suffix := tunnelSuffix + "_l2"
if l3 {
suffix = "_l3"
suffix = tunnelSuffix + "_l3"
}
injectIdx := -1
for idx, inst := range program.Instructions {
Expand Down
4 changes: 4 additions & 0 deletions internal/pwru/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const (
OutputStackMask
OutputCallerMask
OutputCbMask
OutputTunnelMask
)

const (
Expand Down Expand Up @@ -66,6 +67,9 @@ func GetConfig(flags *Flags) (cfg FilterCfg, err error) {
if flags.OutputTuple {
cfg.OutputFlags |= OutputTupleMask
}
if flags.OutputTunnel {
cfg.OutputFlags |= OutputTunnelMask
}
if flags.OutputStack {
cfg.OutputFlags |= OutputStackMask
}
Expand Down
43 changes: 37 additions & 6 deletions internal/pwru/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type jsonPrinter struct {
Len uint32 `json:"len,omitempty"`
Cb [5]uint32 `json:"cb,omitempty"`
Tuple *jsonTuple `json:"tuple,omitempty"`
TunnelTuple *jsonTuple `json:"tunnel_tuple,omitempty"`
Stack interface{} `json:"stack,omitempty"`
SkbMetadata interface{} `json:"skb_metadata,omitempty"`
}
Expand Down Expand Up @@ -155,6 +156,9 @@ func (o *output) PrintHeader() {
if o.flags.OutputCaller {
fmt.Fprintf(o.writer, " %s", "CALLER")
}
if o.flags.OutputTunnel {
fmt.Fprintf(o.writer, " %s", "TUNNEL")
}
fmt.Fprintf(o.writer, "\n")
}

Expand Down Expand Up @@ -209,6 +213,16 @@ func (o *output) PrintJson(event *Event) {
d.Tuple = t
}

if o.flags.OutputTuple {
t := &jsonTuple{}
t.Saddr = addrToStr(event.TunnelTuple.L3Proto, event.TunnelTuple.Saddr)
t.Daddr = addrToStr(event.TunnelTuple.L3Proto, event.TunnelTuple.Daddr)
t.Sport = byteorder.NetworkToHost16(event.TunnelTuple.Sport)
t.Dport = byteorder.NetworkToHost16(event.TunnelTuple.Dport)
t.Proto = event.TunnelTuple.L4Proto
d.TunnelTuple = t
}

if o.flags.OutputStack && event.PrintStackId > 0 {
d.Stack = getStackData(event, o)
}
Expand Down Expand Up @@ -273,20 +287,24 @@ func getAddrByArch(event *Event, o *output) (addr uint64) {
return addr
}

func getTupleData(event *Event, outputTCPFlags bool) (tupleData string) {
func getTuple(tpl Tuple, outputTCPFlags bool) (tupleData string) {
var l4Info string
if event.Tuple.L4Proto == syscall.IPPROTO_TCP && event.Tuple.TCPFlag != 0 && outputTCPFlags {
l4Info = fmt.Sprintf("%s:%s", protoToStr(event.Tuple.L4Proto), event.Tuple.TCPFlag)
if tpl.L4Proto == syscall.IPPROTO_TCP && tpl.TCPFlag != 0 && outputTCPFlags {
l4Info = fmt.Sprintf("%s:%s", protoToStr(tpl.L4Proto), tpl.TCPFlag)
} else {
l4Info = protoToStr(event.Tuple.L4Proto)
l4Info = protoToStr(tpl.L4Proto)
}
tupleData = fmt.Sprintf("%s:%d->%s:%d(%s)",
addrToStr(event.Tuple.L3Proto, event.Tuple.Saddr), byteorder.NetworkToHost16(event.Tuple.Sport),
addrToStr(event.Tuple.L3Proto, event.Tuple.Daddr), byteorder.NetworkToHost16(event.Tuple.Dport),
addrToStr(tpl.L3Proto, tpl.Saddr), byteorder.NetworkToHost16(tpl.Sport),
addrToStr(tpl.L3Proto, tpl.Daddr), byteorder.NetworkToHost16(tpl.Dport),
l4Info)
return tupleData
}

func getTupleData(event *Event, outputTCPFlags bool) (tupleData string) {
return getTuple(event.Tuple, outputTCPFlags)
}

func getStackData(event *Event, o *output) (stackData string) {
var stack StackData
id := uint32(event.PrintStackId)
Expand Down Expand Up @@ -462,9 +480,22 @@ func (o *output) Print(event *Event) {
fmt.Fprintf(o.writer, "%s", getShinfoData(event, o))
}

if o.flags.OutputTunnel {
fmt.Fprintf(o.writer, " "+event.getTunnelL2Data()+" "+getTuple(event.TunnelTuple, o.flags.OutputTCPFlags))
}

fmt.Fprintln(o.writer)
}

func macAddrString(addr [6]byte) string {
return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x",
addr[0], addr[1], addr[2], addr[3], addr[4], addr[5])
}

func (e *Event) getTunnelL2Data() string {
return macAddrString(e.L2Tuple.Src) + " -> " + macAddrString(e.L2Tuple.Dest)
}

func (o *output) getIfaceName(netnsInode, ifindex uint32) string {
if ifaces, ok := o.ifaceCache[uint64(netnsInode)]; ok {
if name, ok := ifaces[ifindex]; ok {
Expand Down
Loading

0 comments on commit c1c0dca

Please sign in to comment.