When qemu_open is passed a filename of the "/dev/fdset/nnn"
format (where nnn is the fdset ID), an fd with matching access
mode flags will be searched for within the specified monitor
fd set. If the fd is found, a dup of the fd will be returned
from qemu_open.
Each fd set has a reference count. The purpose of the reference
count is to determine if an fd set contains file descriptors that
have open dup() references that have not yet been closed. It is
incremented on qemu_open and decremented on qemu_close. It is
not until the refcount is zero that file desriptors in an fd set
can be closed. If an fd set has dup() references open, then we
must keep the other fds in the fd set open in case a reopen
of the file occurs that requires an fd with a different access
mode.
Signed-off-by: Corey Bryant <coreyb(a)linux.vnet.ibm.com>
v2:
-Get rid of file_open and move dup code to qemu_open
(kwolf(a)redhat.com)
-Use strtol wrapper instead of atoi (kwolf(a)redhat.com)
v3:
-Add note about fd leakage (eblake(a)redhat.com)
v4
-Moved patch to be later in series (lcapitulino(a)redhat.com)
-Update qemu_open to check access mode flags and set flags that
can be set (eblake(a)redhat.com, kwolf(a)redhat.com)
v5:
-This patch was overhauled quite a bit in this version, with
the addition of fd set and refcount support.
-Use qemu_set_cloexec() on dup'd fd (eblake(a)redhat.com)
-Modify flags set by fcntl on dup'd fd (eblake(a)redhat.com)
-Reduce syscalls when setting flags for dup'd fd (eblake(a)redhat.com)
-Fix O_RDWR, O_RDONLY, O_WRONLY checks (eblake(a)redhat.com)
v6:
-Pass only the fd to qemu_close() and keep track of dup fds per fd
set. (kwolf(a)redhat.com, eblake(a)redhat.com)
-Handle refcount incr/decr in new dup_fd_add/remove fd functions.
-Use qemu_set_cloexec() appropriately in qemu_dup() (kwolf(a)redhat.com)
-Simplify setting of setfl_flags in qemu_dup() (kwolf(a)redhat.com)
-Add preprocessor checks for F_DUPFD_CLOEXEC (eblake(a)redhat.com)
-Simplify flag checking in monitor_fdset_get_fd() (kwolf(a)redhat.com)
---
cutils.c | 5 +++
monitor.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++
monitor.h | 5 +++
osdep.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-common.h | 1 +
qemu-tool.c | 21 +++++++++++
6 files changed, 244 insertions(+)
diff --git a/cutils.c b/cutils.c
index 9d4c570..8b0d2bb 100644
--- a/cutils.c
+++ b/cutils.c
@@ -382,3 +382,8 @@ int qemu_parse_fd(const char *param)
}
return fd;
}
+
+int qemu_parse_fdset(const char *param)
+{
+ return qemu_parse_fd(param);
+}
diff --git a/monitor.c b/monitor.c
index a46ef8d..66b863f 100644
--- a/monitor.c
+++ b/monitor.c
@@ -155,6 +155,7 @@ struct mon_fdset_t {
int refcount;
bool in_use;
QLIST_HEAD(, mon_fdset_fd_t) fds;
+ QLIST_HEAD(, mon_fdset_fd_t) dup_fds;
QLIST_ENTRY(mon_fdset_t) next;
};
@@ -2572,6 +2573,105 @@ static void monitor_fdsets_set_in_use(Monitor *mon, bool in_use)
}
}
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
+{
+ mon_fdset_t *mon_fdset;
+ mon_fdset_fd_t *mon_fdset_fd;
+ int mon_fd_flags;
+
+ if (!mon) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+ if (mon_fdset->id != fdset_id) {
+ continue;
+ }
+ QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+ if (mon_fdset_fd->removed) {
+ continue;
+ }
+
+ mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
+ if (mon_fd_flags == -1) {
+ return -1;
+ }
+
+ if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
+ return mon_fdset_fd->fd;
+ }
+ }
+ errno = EACCES;
+ return -1;
+ }
+ errno = ENOENT;
+ return -1;
+}
+
+int monitor_fdset_dup_fd_add(Monitor *mon, int64_t fdset_id, int dup_fd)
+{
+ mon_fdset_t *mon_fdset;
+ mon_fdset_fd_t *mon_fdset_fd_dup;
+
+ if (!mon) {
+ return -1;
+ }
+
+ QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+ if (mon_fdset->id != fdset_id) {
+ continue;
+ }
+ QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+ if (mon_fdset_fd_dup->fd == dup_fd) {
+ return -1;
+ }
+ }
+ mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
+ mon_fdset_fd_dup->fd = dup_fd;
+ QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
+ mon_fdset->refcount++;
+ return 0;
+ }
+ return -1;
+}
+
+static int _monitor_fdset_dup_fd_find(Monitor *mon, int dup_fd, bool remove)
+{
+ mon_fdset_t *mon_fdset;
+ mon_fdset_fd_t *mon_fdset_fd_dup;
+
+ if (!mon) {
+ return -1;
+ }
+
+ QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+ QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+ if (mon_fdset_fd_dup->fd == dup_fd) {
+ if (remove) {
+ QLIST_REMOVE(mon_fdset_fd_dup, next);
+ mon_fdset->refcount--;
+ if (mon_fdset->refcount == 0) {
+ monitor_fdset_cleanup(mon_fdset);
+ }
+ }
+ return mon_fdset->id;
+ }
+ }
+ }
+ return -1;
+}
+
+int monitor_fdset_dup_fd_find(Monitor *mon, int dup_fd)
+{
+ return _monitor_fdset_dup_fd_find(mon, dup_fd, false);
+}
+
+int monitor_fdset_dup_fd_remove(Monitor *mon, int dup_fd)
+{
+ return _monitor_fdset_dup_fd_find(mon, dup_fd, true);
+}
+
/* mon_cmds and info_cmds would be sorted at runtime */
static mon_cmd_t mon_cmds[] = {
#include "hmp-commands.h"
diff --git a/monitor.h b/monitor.h
index 5f4de1b..afab88a 100644
--- a/monitor.h
+++ b/monitor.h
@@ -86,4 +86,9 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(Monitor *mon, int64_t fdset_id, int dup_fd);
+int monitor_fdset_dup_fd_remove(Monitor *mon, int dup_fd);
+int monitor_fdset_dup_fd_find(Monitor *mon, int dup_fd);
+
#endif /* !MONITOR_H */
diff --git a/osdep.c b/osdep.c
index 7b09dff..4a4e7e8 100644
--- a/osdep.c
+++ b/osdep.c
@@ -47,6 +47,7 @@ extern int madvise(caddr_t, size_t, int);
#include "qemu-common.h"
#include "trace.h"
#include "qemu_socket.h"
+#include "monitor.h"
static const char *qemu_version = QEMU_VERSION;
@@ -75,6 +76,69 @@ int qemu_madvise(void *addr, size_t len, int advice)
#endif
}
+/*
+ * Dups an fd and sets the flags
+ */
+static int qemu_dup(int fd, int flags)
+{
+ int ret;
+ int serrno;
+ int dup_flags;
+ int setfl_flags;
+
+ if (flags & O_CLOEXEC) {
+#ifdef F_DUPFD_CLOEXEC
+ ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+ ret = dup(fd);
+ if (ret != -1) {
+ qemu_set_cloexec(ret);
+ }
+#endif
+ } else {
+ ret = dup(fd);
+ }
+
+ if (ret == -1) {
+ goto fail;
+ }
+
+ dup_flags = fcntl(ret, F_GETFL);
+ if (dup_flags == -1) {
+ goto fail;
+ }
+
+ if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+ errno = EINVAL;
+ goto fail;
+ }
+
+ /* Set/unset flags that we can with fcntl */
+ setfl_flags = O_APPEND | O_ASYNC | O_DIRECT | O_NOATIME | O_NONBLOCK;
+ dup_flags &= ~setfl_flags;
+ dup_flags |= (flags & setfl_flags);
+ if (fcntl(ret, F_SETFL, dup_flags) == -1) {
+ goto fail;
+ }
+
+ /* Truncate the file in the cases that open() would truncate it */
+ if (flags & O_TRUNC ||
+ ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+ if (ftruncate(ret, 0) == -1) {
+ goto fail;
+ }
+ }
+
+ return ret;
+
+fail:
+ serrno = errno;
+ if (ret != -1) {
+ close(ret);
+ }
+ errno = serrno;
+ return -1;
+}
/*
* Opens a file with FD_CLOEXEC set
@@ -84,6 +148,39 @@ int qemu_open(const char *name, int flags, ...)
int ret;
int mode = 0;
+#ifndef _WIN32
+ const char *fdset_id_str;
+
+ /* Attempt dup of fd from fd set */
+ if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+ int64_t fdset_id;
+ int fd, dupfd;
+
+ fdset_id = qemu_parse_fdset(fdset_id_str);
+ if (fdset_id == -1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);
+ if (fd == -1) {
+ return -1;
+ }
+
+ dupfd = qemu_dup(fd, flags);
+ if (fd == -1) {
+ return -1;
+ }
+
+ ret = monitor_fdset_dup_fd_add(default_mon, fdset_id, dupfd);
+ if (ret == -1) {
+ return -1;
+ }
+
+ return dupfd;
+ }
+#endif
+
if (flags & O_CREAT) {
va_list ap;
@@ -106,6 +203,21 @@ int qemu_open(const char *name, int flags, ...)
int qemu_close(int fd)
{
+ int64_t fdset_id;
+
+ /* Close fd that was dup'd from an fdset */
+ fdset_id = monitor_fdset_dup_fd_find(default_mon, fd);
+ if (fdset_id != -1) {
+ int ret;
+
+ ret = close(fd);
+ if (ret == 0) {
+ monitor_fdset_dup_fd_remove(default_mon, fd);
+ }
+
+ return ret;
+ }
+
return close(fd);
}
diff --git a/qemu-common.h b/qemu-common.h
index 8c4638b..ee114aa 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -148,6 +148,7 @@ int qemu_fls(int i);
int qemu_fdatasync(int fd);
int fcntl_setfl(int fd, int flag);
int qemu_parse_fd(const char *param);
+int qemu_parse_fdset(const char *param);
/*
* strtosz() suffixes used to specify the default treatment of an
diff --git a/qemu-tool.c b/qemu-tool.c
index 318c5fc..269f3b7 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -31,6 +31,7 @@ struct QEMUBH
};
Monitor *cur_mon;
+Monitor *default_mon;
int monitor_cur_is_qmp(void)
{
@@ -57,6 +58,26 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
{
}
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
+{
+ return -1;
+}
+
+int monitor_fdset_dup_fd_add(Monitor *mon, int64_t fdset_id, int dup_fd)
+{
+ return -1;
+}
+
+int monitor_fdset_dup_fd_remove(Monitor *mon, int dup_fd)
+{
+ return -1;
+}
+
+int monitor_fdset_dup_fd_find(Monitor *mon, int dup_fd)
+{
+ return -1;
+}
+
int64_t cpu_get_clock(void)
{
return qemu_get_clock_ns(rt_clock);
--
1.7.10.4