In order to implement devices controller with cgroup v2 we need to
add support for BPF programs, cgroup v2 doesn't have devices controller.
This introduces required helpers wrapping linux syscalls.
Signed-off-by: Pavel Hrdina <phrdina(a)redhat.com>
---
include/libvirt/virterror.h | 1 +
src/libvirt_private.syms | 16 +++
src/util/Makefile.inc.am | 2 +
src/util/virbpf.c | 263 ++++++++++++++++++++++++++++++++++++
src/util/virbpf.h | 246 +++++++++++++++++++++++++++++++++
src/util/virerror.c | 1 +
6 files changed, 529 insertions(+)
create mode 100644 src/util/virbpf.c
create mode 100644 src/util/virbpf.h
diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h
index fbbe2d5624..d47bed4390 100644
--- a/include/libvirt/virterror.h
+++ b/include/libvirt/virterror.h
@@ -131,6 +131,7 @@ typedef enum {
VIR_FROM_PERF = 65, /* Error from perf */
VIR_FROM_LIBSSH = 66, /* Error from libssh connection transport */
VIR_FROM_RESCTRL = 67, /* Error from resource control */
+ VIR_FROM_BPF = 68, /* Error from BPF code */
# ifdef VIR_ENUM_SENTINELS
VIR_ERR_DOMAIN_LAST
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index c3d6306809..0cff580de2 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1477,6 +1477,22 @@ virBitmapToDataBuf;
virBitmapToString;
+# util/virbpf.h
+virBPFAttachProg;
+virBPFCreateMap;
+virBPFDeleteElem;
+virBPFDetachProg;
+virBPFGetMap;
+virBPFGetMapInfo;
+virBPFGetNextElem;
+virBPFGetProg;
+virBPFGetProgInfo;
+virBPFLoadProg;
+virBPFLookupElem;
+virBPFQueryProg;
+virBPFUpdateElem;
+
+
# util/virbuffer.h
virBufferAdd;
virBufferAddBuffer;
diff --git a/src/util/Makefile.inc.am b/src/util/Makefile.inc.am
index 4295babac3..1fd7ad2d43 100644
--- a/src/util/Makefile.inc.am
+++ b/src/util/Makefile.inc.am
@@ -17,6 +17,8 @@ UTIL_SOURCES = \
util/virauthconfig.h \
util/virbitmap.c \
util/virbitmap.h \
+ util/virbpf.c \
+ util/virbpf.h \
util/virbuffer.c \
util/virbuffer.h \
util/virperf.c \
diff --git a/src/util/virbpf.c b/src/util/virbpf.c
new file mode 100644
index 0000000000..be5ebbc033
--- /dev/null
+++ b/src/util/virbpf.c
@@ -0,0 +1,263 @@
+/*
+ * virbpf.c: methods for eBPF
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ */
+#include <config.h>
+
+#include <sys/syscall.h>
+
+#include "internal.h"
+
+#include "virbpf.h"
+#include "virerror.h"
+#include "virfile.h"
+#include "virlog.h"
+#include "virstring.h"
+
+VIR_LOG_INIT("util.bpf");
+
+#define VIR_FROM_THIS VIR_FROM_BPF
+
+int
+virBPFCreateMap(unsigned int mapType,
+ unsigned int keySize,
+ unsigned int valSize,
+ unsigned int maxEntries)
+{
+ union bpf_attr attr = {
+ .map_type = mapType,
+ .key_size = keySize,
+ .value_size = valSize,
+ .max_entries = maxEntries,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+#define LOG_BUF_SIZE (256 * 1024)
+
+int
+virBPFLoadProg(struct bpf_insn *insns,
+ int progType,
+ unsigned int insnCnt)
+{
+ VIR_AUTOFREE(char *) logbuf = NULL;
+ int progfd = -1;
+
+ if (VIR_ALLOC_N(logbuf, LOG_BUF_SIZE) < 0)
+ return -1;
+
+ union bpf_attr attr = {
+ .prog_type = progType,
+ .insn_cnt = (__u32)insnCnt,
+ .insns = (__u64)insns,
+ .license = (__u64)"GPL",
+ .log_buf = (__u64)logbuf,
+ .log_size = LOG_BUF_SIZE,
+ .log_level = 1,
+ };
+
+ progfd = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+
+ if (progfd < 0)
+ VIR_DEBUG("%s", logbuf);
+
+ return progfd;
+}
+
+int
+virBPFAttachProg(int progfd,
+ int targetfd,
+ int attachType)
+{
+ union bpf_attr attr = {
+ .target_fd = targetfd,
+ .attach_bpf_fd = progfd,
+ .attach_type = attachType,
+ };
+
+ return syscall(SYS_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int
+virBPFDetachProg(int progfd,
+ int targetfd,
+ int attachType)
+{
+ union bpf_attr attr = {
+ .target_fd = targetfd,
+ .attach_bpf_fd = progfd,
+ .attach_type = attachType,
+ };
+
+ return syscall(SYS_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int
+virBPFQueryProg(int targetfd,
+ unsigned int maxprogids,
+ int attachType,
+ unsigned int *progcnt,
+ void *progids)
+{
+ int rc;
+
+ union bpf_attr attr = {
+ .query.target_fd = targetfd,
+ .query.attach_type = attachType,
+ .query.prog_cnt = maxprogids,
+ .query.prog_ids = (__u64)progids,
+ };
+
+ rc = syscall(SYS_bpf, BPF_PROG_QUERY, &attr, sizeof(attr));
+
+ if (rc >= 0)
+ *progcnt = attr.query.prog_cnt;
+
+ return rc;
+}
+
+int
+virBPFGetProg(unsigned int id)
+{
+ union bpf_attr attr = {
+ .prog_id = id,
+ };
+
+ return syscall(SYS_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int
+virBPFGetProgInfo(int progfd,
+ struct bpf_prog_info *info,
+ unsigned int **mapIDs)
+{
+ int rc;
+
+ union bpf_attr attr = {
+ .info.bpf_fd = progfd,
+ .info.info_len = sizeof(struct bpf_prog_info),
+ .info.info = (__u64)info,
+ };
+
+ rc = syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (rc < 0)
+ return rc;
+
+ if (mapIDs && info->nr_map_ids > 0) {
+ unsigned int maplen = info->nr_map_ids;
+ VIR_AUTOFREE(unsigned int *) retmapIDs = NULL;
+
+ if (VIR_ALLOC_N(retmapIDs, maplen) < 0)
+ return -1;
+
+ memset(info, 0, sizeof(struct bpf_prog_info));
+ info->nr_map_ids = maplen;
+ info->map_ids = (__u64)retmapIDs;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.info.bpf_fd = progfd;
+ attr.info.info_len = sizeof(struct bpf_prog_info);
+ attr.info.info = (__u64)info;
+
+ rc = syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (rc < 0)
+ return rc;
+
+ VIR_STEAL_PTR(*mapIDs, retmapIDs);
+ }
+
+ return rc;
+}
+
+int
+virBPFGetMap(unsigned int id)
+{
+ union bpf_attr attr = {
+ .map_id = id,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int
+virBPFGetMapInfo(int mapfd,
+ struct bpf_map_info *info)
+{
+ union bpf_attr attr = {
+ .info.bpf_fd = mapfd,
+ .info.info_len = sizeof(struct bpf_map_info),
+ .info.info = (__u64)info,
+ };
+
+ return syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+}
+
+int
+virBPFLookupElem(int mapfd,
+ void *key,
+ void *val)
+{
+ union bpf_attr attr = {
+ .map_fd = mapfd,
+ .key = (__u64)key,
+ .value = (__u64)val,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int
+virBPFGetNextElem(int mapfd,
+ void *key,
+ void *nextKey)
+{
+ union bpf_attr attr = {
+ .map_fd = mapfd,
+ .key = (__u64)key,
+ .next_key = (__u64)nextKey,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int
+virBPFUpdateElem(int mapfd,
+ void *key,
+ void *val)
+{
+ union bpf_attr attr = {
+ .map_fd = mapfd,
+ .key = (__u64)key,
+ .value = (__u64)val,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int
+virBPFDeleteElem(int mapfd,
+ void *key)
+{
+ union bpf_attr attr = {
+ .map_fd = mapfd,
+ .key = (__u64)key,
+ };
+
+ return syscall(SYS_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
diff --git a/src/util/virbpf.h b/src/util/virbpf.h
new file mode 100644
index 0000000000..7085edaacc
--- /dev/null
+++ b/src/util/virbpf.h
@@ -0,0 +1,246 @@
+/*
+ * virbpf.h: methods for eBPF
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBVIRT_VIRBPF_H
+# define LIBVIRT_VIRBPF_H
+
+# include <linux/bpf.h>
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+# define VIR_BPF_ALU64_REG(op, dst, src) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(op) | BPF_X, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+# define VIR_BPF_ALU64_IMM(op, dst, immval) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(op) | BPF_K, \
+ .dst_reg = dst, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = immval, \
+ })
+
+/* Short form of mov, dst_reg = src_reg */
+
+# define VIR_BPF_MOV64_REG(dst, src) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+/* Short form of mov, dst_reg = imm32 */
+
+# define VIR_BPF_MOV64_IMM(dst, immval) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = dst, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = immval, \
+ })
+
+# define VIR_BPF_MOV32_IMM(dst, immval) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = dst, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = immval, \
+ })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+# define VIR_BPF_LD_IMM64(dst, imm) \
+ BPF_LD_IMM64_RAW(dst, 0, imm)
+
+# define VIR_BPF_LD_IMM64_RAW(dst, src, immval) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = 0, \
+ .imm = (__u32)immval, \
+ }), \
+ ((struct bpf_insn) { \
+ .code = 0, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64)immval) >> 32, \
+ })
+
+# ifndef VIR_BPF_PSEUDO_MAP_FD
+# define VIR_BPF_PSEUDO_MAP_FD 1
+# endif
+
+/* pseudo VIR_BPF_LD_IMM64 insn used to refer to process-local map_fd */
+# define VIR_BPF_LD_MAP_FD(dst, mapfd) \
+ VIR_BPF_LD_IMM64_RAW(dst, VIR_BPF_PSEUDO_MAP_FD, mapfd)
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+# define VIR_BPF_LDX_MEM(size, dst, src, offval) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(size) | BPF_MEM, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = offval, \
+ .imm = 0, \
+ })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+# define VIR_BPF_STX_MEM(size, dst, src, offval) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(size) | BPF_MEM, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = offval, \
+ .imm = 0, \
+ })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+# define VIR_BPF_ST_MEM(size, dst, immval, offval) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(size) | BPF_MEM, \
+ .dst_reg = dst, \
+ .src_reg = 0, \
+ .off = offval, \
+ .imm = immval, \
+ })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16
*/
+
+# define VIR_BPF_JMP_REG(op, dst, src, offval) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(op) | BPF_X, \
+ .dst_reg = dst, \
+ .src_reg = src, \
+ .off = offval, \
+ .imm = 0, \
+ })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16
*/
+
+# define VIR_BPF_JMP_IMM(op, dst, immval, offval) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(op) | BPF_K, \
+ .dst_reg = dst, \
+ .src_reg = 0, \
+ .off = offval, \
+ .imm = immval, \
+ })
+
+/* Call eBPF function */
+
+# define VIR_BPF_CALL_INSN(func) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = func, \
+ })
+
+/* Program exit */
+
+# define VIR_BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+int
+virBPFCreateMap(unsigned int mapType,
+ unsigned int keySize,
+ unsigned int valSize,
+ unsigned int maxEntries);
+
+int
+virBPFGetMapInfo(int mapfd,
+ struct bpf_map_info *info);
+
+int
+virBPFLoadProg(struct bpf_insn *insns,
+ int progType,
+ unsigned int insnCnt);
+
+int
+virBPFAttachProg(int progfd,
+ int targetfd,
+ int attachType);
+
+int
+virBPFDetachProg(int progfd,
+ int targetfd,
+ int attachType);
+
+int
+virBPFQueryProg(int targetfd,
+ unsigned int maxprogids,
+ int attachType,
+ unsigned int *progcnt,
+ void *progids);
+
+int
+virBPFGetProg(unsigned int id);
+
+int
+virBPFGetProgInfo(int progfd,
+ struct bpf_prog_info *info,
+ unsigned int **mapIDs);
+
+int
+virBPFGetMap(unsigned int id);
+
+int
+virBPFLookupElem(int mapfd,
+ void *key,
+ void *val);
+
+int
+virBPFGetNextElem(int mapfd,
+ void *key,
+ void *nextKey);
+
+int
+virBPFUpdateElem(int mapfd,
+ void *key,
+ void *val);
+
+int
+virBPFDeleteElem(int mapfd,
+ void *key);
+
+#endif /* LIBVIRT_VIRBPF_H */
diff --git a/src/util/virerror.c b/src/util/virerror.c
index 61b47d2be0..a40076f8ec 100644
--- a/src/util/virerror.c
+++ b/src/util/virerror.c
@@ -138,6 +138,7 @@ VIR_ENUM_IMPL(virErrorDomain, VIR_ERR_DOMAIN_LAST,
"Perf", /* 65 */
"Libssh transport layer",
"Resource control",
+ "BPF",
)
--
2.20.1