Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKumar Kartikeya Dwivedi <memxor@gmail.com>2022-04-25 00:48:49 +0300
committerAlexei Starovoitov <ast@kernel.org>2022-04-26 03:31:35 +0300
commit61df10c7799e27807ad5e459eec9d77cddf8bf45 (patch)
tree348906cc67506a3d63f75ed65ad7f73beaee6f17 /kernel/bpf/syscall.c
parentd9d31cf88702ae071bec033e5c8714048aa71285 (diff)
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID register must have the same type as in the map value's BTF, and loading a kptr marks the destination register as PTR_TO_BTF_ID with the correct kernel BTF and BTF ID. Such kptr are unreferenced, i.e. by the time another invocation of the BPF program loads this pointer, the object which the pointer points to may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are patched to PROBE_MEM loads by the verifier, it would safe to allow user to still access such invalid pointer, but passing such pointers into BPF helpers and kfuncs should not be permitted. A future patch in this series will close this gap. The flexibility offered by allowing programs to dereference such invalid pointers while being safe at runtime frees the verifier from doing complex lifetime tracking. As long as the user may ensure that the object remains valid, it can ensure data read by it from the kernel object is valid. The user indicates that a certain pointer must be treated as kptr capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this information is recorded in the object BTF which will be passed into the kernel by way of map's BTF information. The name and kind from the map value BTF is used to look up the in-kernel type, and the actual BTF and BTF ID is recorded in the map struct in a new kptr_off_tab member. For now, only storing pointers to structs is permitted. An example of this specification is shown below: #define __kptr __attribute__((btf_type_tag("kptr"))) struct map_value { ... struct task_struct __kptr *task; ... }; Then, in a BPF program, user may store PTR_TO_BTF_ID with the type task_struct into the map, and then load it later. Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as the verifier cannot know whether the value is NULL or not statically, it must treat all potential loads at that map value offset as loading a possibly NULL pointer. Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL) are allowed instructions that can access such a pointer. On BPF_LDX, the destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX, it is checked whether the source register type is a PTR_TO_BTF_ID with same BTF type as specified in the map BTF. The access size must always be BPF_DW. For the map in map support, the kptr_off_tab for outer map is copied from the inner map's kptr_off_tab. It was chosen to do a deep copy instead of introducing a refcount to kptr_off_tab, because the copy only needs to be done when paramterizing using inner_map_fd in the map in map case, hence would be unnecessary for all other users. It is not permitted to use MAP_FREEZE command and mmap for BPF map having kptrs, similar to the bpf_timer case. A kptr also requires that BPF program has both read and write access to the map (hence both BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed). Note that check_map_access must be called from both check_helper_mem_access and for the BPF instructions, hence the kptr check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src and reuse it for this purpose. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c103
1 files changed, 100 insertions, 3 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e9e3e49c0eb7..575b09339360 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6,6 +6,7 @@
#include <linux/bpf_trace.h>
#include <linux/bpf_lirc.h>
#include <linux/bpf_verifier.h>
+#include <linux/bsearch.h>
#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
@@ -473,12 +474,84 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
+static int bpf_map_kptr_off_cmp(const void *a, const void *b)
+{
+ const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
+
+ if (off_desc1->offset < off_desc2->offset)
+ return -1;
+ else if (off_desc1->offset > off_desc2->offset)
+ return 1;
+ return 0;
+}
+
+struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
+{
+ /* Since members are iterated in btf_find_field in increasing order,
+ * offsets appended to kptr_off_tab are in increasing order, so we can
+ * do bsearch to find exact match.
+ */
+ struct bpf_map_value_off *tab;
+
+ if (!map_value_has_kptrs(map))
+ return NULL;
+ tab = map->kptr_off_tab;
+ return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
+}
+
+void bpf_map_free_kptr_off_tab(struct bpf_map *map)
+{
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ int i;
+
+ if (!map_value_has_kptrs(map))
+ return;
+ for (i = 0; i < tab->nr_off; i++)
+ btf_put(tab->off[i].kptr.btf);
+ kfree(tab);
+ map->kptr_off_tab = NULL;
+}
+
+struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
+{
+ struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab;
+ int size, i;
+
+ if (!map_value_has_kptrs(map))
+ return ERR_PTR(-ENOENT);
+ size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
+ new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
+ if (!new_tab)
+ return ERR_PTR(-ENOMEM);
+ /* Do a deep copy of the kptr_off_tab */
+ for (i = 0; i < tab->nr_off; i++)
+ btf_get(tab->off[i].kptr.btf);
+ return new_tab;
+}
+
+bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+{
+ struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
+ bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
+ int size;
+
+ if (!a_has_kptr && !b_has_kptr)
+ return true;
+ if (a_has_kptr != b_has_kptr)
+ return false;
+ if (tab_a->nr_off != tab_b->nr_off)
+ return false;
+ size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]);
+ return !memcmp(tab_a, tab_b, size);
+}
+
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
security_bpf_map_free(map);
+ bpf_map_free_kptr_off_tab(map);
bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
@@ -640,7 +713,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
int err;
if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
- map_value_has_timer(map))
+ map_value_has_timer(map) || map_value_has_kptrs(map))
return -ENOTSUPP;
if (!(vma->vm_flags & VM_SHARED))
@@ -820,10 +893,34 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EOPNOTSUPP;
}
- if (map->ops->map_check_btf)
+ map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
+ if (map_value_has_kptrs(map)) {
+ if (!bpf_capable()) {
+ ret = -EPERM;
+ goto free_map_tab;
+ }
+ if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
+ ret = -EACCES;
+ goto free_map_tab;
+ }
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ }
+
+ if (map->ops->map_check_btf) {
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
+ if (ret < 0)
+ goto free_map_tab;
+ }
return ret;
+free_map_tab:
+ bpf_map_free_kptr_off_tab(map);
+ return ret;
}
#define BPF_MAP_CREATE_LAST_FIELD map_extra
@@ -1639,7 +1736,7 @@ static int map_freeze(const union bpf_attr *attr)
return PTR_ERR(map);
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
- map_value_has_timer(map)) {
+ map_value_has_timer(map) || map_value_has_kptrs(map)) {
fdput(f);
return -ENOTSUPP;
}