Welcome to mirror list, hosted at ThFree Co, Russian Federation.

bpfmap.c « criu - github.com/checkpoint-restore/criu.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 25098368d35cab40d0bbbf2a2260f886a61d0879 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#include <stdio.h>
#include <bpf/bpf.h>

#include "common/compiler.h"
#include "imgset.h"
#include "bpfmap.h"
#include "fdinfo.h"
#include "image.h"
#include "util.h"
#include "log.h"

#include "protobuf.h"

#ifndef LIBBPF_OPTS
#define LIBBPF_OPTS   DECLARE_LIBBPF_OPTS
#define LEGACY_LIBBPF /* Using libbpf < 0.7 */
#endif

int is_bpfmap_link(char *link)
{
	return is_anon_link_type(link, "bpf-map");
}

static void pr_info_bpfmap(char *action, BpfmapFileEntry *bpf)
{
	pr_info("%sbpfmap: id %#08x map_id %#08x map_type %d flags %" PRIx32 "\n", action, bpf->id, bpf->map_id,
		bpf->map_type, bpf->map_flags);
}

struct bpfmap_data_rst *bpfmap_data_hash_table[BPFMAP_DATA_TABLE_SIZE];

static int bpfmap_data_read(struct cr_img *img, struct bpfmap_data_rst *r)
{
	unsigned long bytes = r->bde->keys_bytes + r->bde->values_bytes;
	if (!bytes)
		return 0;

	r->data = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (r->data == MAP_FAILED) {
		pr_perror("Can't map mem for bpfmap buffers");
		return -1;
	}

	return read_img_buf(img, r->data, bytes);
}

int do_collect_bpfmap_data(struct bpfmap_data_rst *r, ProtobufCMessage *msg, struct cr_img *img,
			   struct bpfmap_data_rst **bpf_hash_table)
{
	int ret;
	int table_index;

	r->bde = pb_msg(msg, BpfmapDataEntry);
	ret = bpfmap_data_read(img, r);
	if (ret < 0)
		return ret;

	table_index = r->bde->map_id & BPFMAP_DATA_HASH_MASK;
	r->next = bpf_hash_table[table_index];
	bpf_hash_table[table_index] = r;

	pr_info("Collected bpfmap data for %#x\n", r->bde->map_id);
	return 0;
}

int restore_bpfmap_data(int map_fd, uint32_t map_id, struct bpfmap_data_rst **bpf_hash_table)
{
	struct bpfmap_data_rst *map_data;
	BpfmapDataEntry *bde;
	void *keys = NULL;
	void *values = NULL;
	unsigned int count;
	LIBBPF_OPTS(bpf_map_batch_opts, opts);

	for (map_data = bpf_hash_table[map_id & BPFMAP_DATA_HASH_MASK]; map_data != NULL; map_data = map_data->next) {
		if (map_data->bde->map_id == map_id)
			break;
	}

	if (!map_data || map_data->bde->count == 0) {
		pr_info("No data for BPF map %#x\n", map_id);
		return 0;
	}

	bde = map_data->bde;
	count = bde->count;

	keys = mmap(NULL, bde->keys_bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (keys == MAP_FAILED) {
		pr_perror("Can't map memory for BPF map keys");
		goto err;
	}
	memcpy(keys, map_data->data, bde->keys_bytes);

	values = mmap(NULL, bde->values_bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (values == MAP_FAILED) {
		pr_perror("Can't map memory for BPF map values");
		goto err;
	}
	memcpy(values, map_data->data + bde->keys_bytes, bde->values_bytes);

	if (bpf_map_update_batch(map_fd, keys, values, &count, &opts)) {
		pr_perror("Can't load key-value pairs to BPF map");
		goto err;
	}
	munmap(keys, bde->keys_bytes);
	munmap(values, bde->values_bytes);
	return 0;

err:
	munmap(keys, bde->keys_bytes);
	munmap(values, bde->values_bytes);
	return -1;
}

static int collect_bpfmap_data(void *obj, ProtobufCMessage *msg, struct cr_img *img)
{
	return do_collect_bpfmap_data(obj, msg, img, bpfmap_data_hash_table);
}

struct collect_image_info bpfmap_data_cinfo = {
	.fd_type = CR_FD_BPFMAP_DATA,
	.pb_type = PB_BPFMAP_DATA,
	.priv_size = sizeof(struct bpfmap_data_rst),
	.collect = collect_bpfmap_data,
};

int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p)
{
	/*
	 * Linux kernel patch notes for bpf_map_*_batch():
	 *
	 * in_batch/out_batch are opaque values use to communicate between
	 * user/kernel space, in_batch/out_batch must be of key_size length.
	 * To start iterating from the beginning in_batch must be null,
	 * count is the # of key/value elements to retrieve. Note that the 'keys'
	 * buffer must be a buffer of key_size * count size and the 'values' buffer
	 * must be value_size * count, where value_size must be aligned to 8 bytes
	 * by userspace if it's dealing with percpu maps. 'count' will contain the
	 * number of keys/values successfully retrieved. Note that 'count' is an
	 * input/output variable and it can contain a lower value after a call.
	 *
	 * If there's no more entries to retrieve, ENOENT will be returned. If error
	 * is ENOENT, count might be > 0 in case it copied some values but there were
	 * no more entries to retrieve.
	 *
	 * Note that if the return code is an error and not -EFAULT,
	 * count indicates the number of elements successfully processed.
	 */

	struct cr_img *img;
	uint32_t key_size, value_size, max_entries, count;
	void *keys = NULL, *values = NULL;
	void *in_batch = NULL, *out_batch = NULL;
	BpfmapDataEntry bde = BPFMAP_DATA_ENTRY__INIT;
	LIBBPF_OPTS(bpf_map_batch_opts, opts);
	int ret;

	key_size = bpf->key_size;
	value_size = bpf->value_size;
	max_entries = bpf->max_entries;
	count = max_entries;

	keys = mmap(NULL, key_size * max_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (keys == MAP_FAILED) {
		pr_perror("Can't map memory for BPF map keys");
		goto err;
	}

	values = mmap(NULL, value_size * max_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (values == MAP_FAILED) {
		pr_perror("Can't map memory for BPF map values");
		goto err;
	}

	out_batch = mmap(NULL, key_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
	if (out_batch == MAP_FAILED) {
		pr_perror("Can't map memory for BPF map out_batch");
		goto err;
	}

	ret = bpf_map_lookup_batch(lfd, in_batch, out_batch, keys, values, &count, &opts);
	if (ret && errno != ENOENT) {
		pr_perror("Can't perform a batch lookup on BPF map");
		goto err;
	}

	img = img_from_set(glob_imgset, CR_FD_BPFMAP_DATA);

	bde.map_id = bpf->map_id;
	bde.keys_bytes = (key_size * count);
	bde.values_bytes = (value_size * count);
	bde.count = count;

	if (pb_write_one(img, &bde, PB_BPFMAP_DATA))
		goto err;

	if (write(img_raw_fd(img), keys, key_size * count) != (key_size * count)) {
		pr_perror("Can't write BPF map's keys");
		goto err;
	}
	if (write(img_raw_fd(img), values, value_size * count) != (value_size * count)) {
		pr_perror("Can't write BPF map's values");
		goto err;
	}

	munmap(keys, key_size * max_entries);
	munmap(values, value_size * max_entries);
	munmap(out_batch, key_size);
	return 0;

err:
	munmap(keys, key_size * max_entries);
	munmap(values, value_size * max_entries);
	munmap(out_batch, key_size);
	return -1;
}

static int dump_one_bpfmap(int lfd, u32 id, const struct fd_parms *p)
{
	BpfmapFileEntry bpf = BPFMAP_FILE_ENTRY__INIT;
	FileEntry fe = FILE_ENTRY__INIT;
	int ret;
	/* If we are using a bigger struct than the kernel knows of,
	 * ensure all the unknown bits are 0 - i.e. new user-space
	 * does not rely on any unknown kernel feature extensions.
	 * https://github.com/torvalds/linux/blob/a1994480/kernel/bpf/syscall.c#L70
	 */
	struct bpf_map_info map_info = {};
	uint32_t info_len = sizeof(struct bpf_map_info);

	if (parse_fdinfo(lfd, FD_TYPES__BPFMAP, &bpf))
		return -1;

	ret = bpf_obj_get_info_by_fd(lfd, &map_info, &info_len);
	if (ret) {
		pr_perror("Could not get BPF map info");
		return -1;
	}

	switch (bpf.map_type) {
	case BPF_MAP_TYPE_HASH:
	case BPF_MAP_TYPE_ARRAY:
		bpf.id = id;
		bpf.flags = p->flags;
		bpf.fown = (FownEntry *)&p->fown;
		bpf.map_name = xstrdup(map_info.name);
		bpf.ifindex = map_info.ifindex;

		fe.type = FD_TYPES__BPFMAP;
		fe.id = bpf.id;
		fe.bpf = &bpf;

		pr_info_bpfmap("Dumping ", &bpf);
		if (pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE))
			return -1;
		pr_info_bpfmap("Dumping data for ", &bpf);
		ret = dump_one_bpfmap_data(&bpf, lfd, p);
		break;

	default:
		pr_err("CRIU does not currently support dumping BPF map type %u!\n", bpf.map_type);
		ret = -1;
	}

	return ret;
}

const struct fdtype_ops bpfmap_dump_ops = {
	.type = FD_TYPES__BPFMAP,
	.dump = dump_one_bpfmap,
};

static int bpfmap_open(struct file_desc *d, int *new_fd)
{
	struct bpfmap_file_info *info;
	BpfmapFileEntry *bpfe;
	int bpfmap_fd;
#ifdef LEGACY_LIBBPF
	struct bpf_create_map_attr xattr;
#else
	LIBBPF_OPTS(bpf_map_create_opts, bpfmap_opts);
#endif

	info = container_of(d, struct bpfmap_file_info, d);
	bpfe = info->bpfe;

	pr_info_bpfmap("Creating and opening ", bpfe);

#ifdef LEGACY_LIBBPF
	xattr.name = xstrdup(bpfe->map_name);
	xattr.map_type = bpfe->map_type;
	xattr.map_flags = bpfe->map_flags;
	xattr.key_size = bpfe->key_size;
	xattr.value_size = bpfe->value_size;
	xattr.max_entries = bpfe->max_entries;
	xattr.numa_node = 0;
	xattr.btf_fd = 0;
	xattr.btf_key_type_id = 0;
	xattr.btf_value_type_id = 0;
	xattr.map_ifindex = bpfe->ifindex;
	xattr.inner_map_fd = 0;

	bpfmap_fd = bpf_create_map_xattr(&xattr);
#else
	bpfmap_opts.map_flags = bpfe->map_flags;
	bpfmap_opts.map_ifindex = bpfe->ifindex;
	if (bpfe->has_map_extra)
		bpfmap_opts.map_extra = bpfe->map_extra;

	bpfmap_fd = bpf_map_create(bpfe->map_type, bpfe->map_name, bpfe->key_size, bpfe->value_size, bpfe->max_entries,
				   &bpfmap_opts);
#endif

	if (bpfmap_fd < 0) {
		pr_perror("Can't create bpfmap %#08x", bpfe->id);
		return -1;
	}

	if (bpfe->has_map_extra && bpfe->map_extra)
		pr_warn("bpfmap map_extra has non-zero value. This will not be restored.\n");

	if (restore_bpfmap_data(bpfmap_fd, bpfe->map_id, bpfmap_data_hash_table))
		return -1;

	if (bpfe->frozen) {
		if (bpf_map_freeze(bpfmap_fd)) {
			pr_perror("Can't freeze bpfmap %#08x", bpfe->id);
			goto err_close;
		}
	}

	if (rst_file_params(bpfmap_fd, bpfe->fown, bpfe->flags)) {
		pr_perror("Can't restore params on bpfmap %#08x", bpfe->id);
		goto err_close;
	}

	*new_fd = bpfmap_fd;
	return 0;

err_close:
	close(bpfmap_fd);
	return -1;
}

static struct file_desc_ops bpfmap_desc_ops = {
	.type = FD_TYPES__BPFMAP,
	.open = bpfmap_open,
};

static int collect_one_bpfmap(void *obj, ProtobufCMessage *msg, struct cr_img *i)
{
	struct bpfmap_file_info *info = obj;

	info->bpfe = pb_msg(msg, BpfmapFileEntry);
	pr_info_bpfmap("Collected ", info->bpfe);
	return file_desc_add(&info->d, info->bpfe->id, &bpfmap_desc_ops);
}

struct collect_image_info bpfmap_cinfo = {
	.fd_type = CR_FD_BPFMAP_FILE,
	.pb_type = PB_BPFMAP_FILE,
	.priv_size = sizeof(struct bpfmap_file_info),
	.collect = collect_one_bpfmap,
};