Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-08-31 02:42:46 +0300
committerJakub Kicinski <kuba@kernel.org>2021-08-31 02:42:47 +0300
commit19a31d79219cdd66484721f564ee68293dcc6c24 (patch)
treebe33d5a07761dc01a3dac4c77e9a642625b8f573 /samples/bpf/xdp_redirect_cpu.bpf.c
parentca49bfd90a9dde175d2929dc1544b54841e33804 (diff)
parentfca35b11e18a9d854cda6b18ed39a78011f4b082 (diff)
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== bpf-next 2021-08-31 We've added 116 non-merge commits during the last 17 day(s) which contain a total of 126 files changed, 6813 insertions(+), 4027 deletions(-). The main changes are: 1) Add opaque bpf_cookie to perf link which the program can read out again, to be used in libbpf-based USDT library, from Andrii Nakryiko. 2) Add bpf_task_pt_regs() helper to access userspace pt_regs, from Daniel Xu. 3) Add support for UNIX stream type sockets for BPF sockmap, from Jiang Wang. 4) Allow BPF TCP congestion control progs to call bpf_setsockopt() e.g. to switch to another congestion control algorithm during init, from Martin KaFai Lau. 5) Extend BPF iterator support for UNIX domain sockets, from Kuniyuki Iwashima. 6) Allow bpf_{set,get}sockopt() calls from setsockopt progs, from Prankur Gupta. 7) Add bpf_get_netns_cookie() helper for BPF_PROG_TYPE_{SOCK_OPS,CGROUP_SOCKOPT} progs, from Xu Liu and Stanislav Fomichev. 8) Support for __weak typed ksyms in libbpf, from Hao Luo. 9) Shrink struct cgroup_bpf by 504 bytes through refactoring, from Dave Marchevsky. 10) Fix a smatch complaint in verifier's narrow load handling, from Andrey Ignatov. 11) Fix BPF interpreter's tail call count limit, from Daniel Borkmann. 12) Big batch of improvements to BPF selftests, from Magnus Karlsson, Li Zhijian, Yucong Sun, Yonghong Song, Ilya Leoshkevich, Jussi Maki, Ilya Leoshkevich, others. 13) Another big batch to revamp XDP samples in order to give them consistent look and feel, from Kumar Kartikeya Dwivedi. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (116 commits) MAINTAINERS: Remove self from powerpc BPF JIT selftests/bpf: Fix potential unreleased lock samples: bpf: Fix uninitialized variable in xdp_redirect_cpu selftests/bpf: Reduce more flakyness in sockmap_listen bpf: Fix bpf-next builds without CONFIG_BPF_EVENTS bpf: selftests: Add dctcp fallback test bpf: selftests: Add connect_to_fd_opts to network_helpers bpf: selftests: Add sk_state to bpf_tcp_helpers.h bpf: tcp: Allow bpf-tcp-cc to call bpf_(get|set)sockopt selftests: xsk: Preface options with opt selftests: xsk: Make enums lower case selftests: xsk: Generate packets from specification selftests: xsk: Generate packet directly in umem selftests: xsk: Simplify cleanup of ifobjects selftests: xsk: Decrease sending speed selftests: xsk: Validate tx stats on tx thread selftests: xsk: Simplify packet validation in xsk tests selftests: xsk: Rename worker_* functions that are not thread entry points selftests: xsk: Disassociate umem size with packets sent selftests: xsk: Remove end-of-test packet ... ==================== Link: https://lore.kernel.org/r/20210830225618.11634-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'samples/bpf/xdp_redirect_cpu.bpf.c')
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c541
1 files changed, 541 insertions, 0 deletions
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
new file mode 100644
index 000000000000..f10fe3cf25f6
--- /dev/null
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -0,0 +1,541 @@
+/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
+ *
+ * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+#include "hash_func01.h"
+
+/* Special map type that can XDP_REDIRECT frames to another CPU */
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+} cpu_map SEC(".maps");
+
+/* Set of maps controlling available CPU, and for iterating through
+ * selectable redirect CPUs.
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+} cpus_available SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} cpus_count SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} cpus_iterator SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} tx_port SEC(".maps");
+
+char tx_mac_addr[ETH_ALEN];
+
+/* Helper parse functions */
+
+static __always_inline
+bool parse_eth(struct ethhdr *eth, void *data_end,
+ u16 *eth_proto, u64 *l3_offset)
+{
+ u16 eth_type;
+ u64 offset;
+
+ offset = sizeof(*eth);
+ if ((void *)eth + offset > data_end)
+ return false;
+
+ eth_type = eth->h_proto;
+
+ /* Skip non 802.3 Ethertypes */
+ if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
+ return false;
+
+ /* Handle VLAN tagged packet */
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
+ /* Handle double VLAN tagged packet */
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
+
+ *eth_proto = bpf_ntohs(eth_type);
+ *l3_offset = offset;
+ return true;
+}
+
+static __always_inline
+u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ struct udphdr *udph;
+ u16 dport;
+
+ if (iph + 1 > data_end)
+ return 0;
+ if (!(iph->protocol == IPPROTO_UDP))
+ return 0;
+
+ udph = (void *)(iph + 1);
+ if (udph + 1 > data_end)
+ return 0;
+
+ dport = bpf_ntohs(udph->dest);
+ return dport;
+}
+
+static __always_inline
+int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return 0;
+ return iph->protocol;
+}
+
+static __always_inline
+int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+ return ip6h->nexthdr;
+}
+
+SEC("xdp")
+int xdp_prognum0_no_touch(struct xdp_md *ctx)
+{
+ u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 *cpu_selected;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int xdp_prognum1_touch_data(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u32 *cpu_selected;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
+ u16 eth_type;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ /* Validate packet length is minimum Eth header size */
+ if (eth + 1 > data_end)
+ return XDP_ABORTED;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ /* Read packet data, and use it (drop non 802.3 Ethertypes) */
+ eth_type = eth->h_proto;
+ if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+ NO_TEAR_INC(rec->dropped);
+ return XDP_DROP;
+ }
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int xdp_prognum2_round_robin(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
+
+ u32 *cpu_selected;
+ u32 *cpu_iterator;
+ u32 *cpu_max;
+ u32 cpu_idx;
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
+ if (!cpu_iterator)
+ return XDP_ABORTED;
+ cpu_idx = *cpu_iterator;
+
+ *cpu_iterator += 1;
+ if (*cpu_iterator == *cpu_max)
+ *cpu_iterator = 0;
+
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int xdp_prognum3_proto_separate(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 *cpu_lookup;
+ u32 cpu_idx = 0;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 *cpu_lookup;
+ u32 cpu_idx = 0;
+ u16 dest_port;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ /* DDoS filter UDP port 9 (pktgen) */
+ dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
+ if (dest_port == 9) {
+ NO_TEAR_INC(rec->dropped);
+ return XDP_DROP;
+ }
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ u32 cpu_hash;
+
+ if (iph + 1 > data_end)
+ return 0;
+
+ cpu_hash = iph->saddr + iph->daddr;
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+ return cpu_hash;
+}
+
+static __always_inline
+u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+ u32 cpu_hash;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+
+ cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+ return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp")
+int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 cpu_idx = 0;
+ u32 *cpu_lookup;
+ u32 key0 = 0;
+ u32 *cpu_max;
+ u32 cpu_hash;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Hash for IPv4 and IPv6 */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+ default:
+ cpu_hash = 0;
+ }
+
+ /* Choose CPU based on hash */
+ cpu_idx = cpu_hash % *cpu_max;
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpumap/redirect")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("xdp_cpumap/pass")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp_cpumap/drop")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
+{
+ return XDP_DROP;
+}
+
+SEC("xdp_devmap/egress")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";