[PATCH v2 0/5] Initial network support in ch driver.

v2: * Refactor virSocketRecvHttpResponse to return responses without parsing http responses. * Use errno to report errors in virsocket.c * Address WIN32 build failure in virsocket.c * Fix code indentations Praveen K Paladugu (5): conf: Drop unused parameter hypervisor: Move domain interface mgmt methods util: Add util methods required by ch networking ch: Introduce version based cap for network support ch: Enable ETHERNET Network mode support po/POTFILES | 3 + src/ch/ch_capabilities.c | 9 + src/ch/ch_capabilities.h | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 +++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 +++++++ src/ch/ch_interface.h | 35 +++ src/ch/ch_monitor.c | 213 +++++--------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 ++++++++++- src/ch/meson.build | 2 + src/conf/domain_conf.c | 1 - src/conf/domain_conf.h | 3 +- src/hypervisor/domain_interface.c | 457 ++++++++++++++++++++++++++++++ src/hypervisor/domain_interface.h | 45 +++ src/hypervisor/meson.build | 1 + src/libvirt_private.syms | 11 + src/libxl/libxl_domain.c | 2 +- src/libxl/libxl_driver.c | 4 +- src/lxc/lxc_driver.c | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_command.c | 8 +- src/qemu/qemu_hotplug.c | 15 +- src/qemu/qemu_interface.c | 339 +--------------------- src/qemu/qemu_interface.h | 11 - src/qemu/qemu_process.c | 72 +---- src/util/virsocket.c | 116 ++++++++ src/util/virsocket.h | 3 + 29 files changed, 1107 insertions(+), 571 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h create mode 100644 src/hypervisor/domain_interface.c create mode 100644 src/hypervisor/domain_interface.h -- 2.43.0

Drop unused parameter from virDomainNetReleaseActualDevice method. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/conf/domain_conf.c | 1 - src/conf/domain_conf.h | 3 +-- src/libxl/libxl_domain.c | 2 +- src/libxl/libxl_driver.c | 4 ++-- src/lxc/lxc_driver.c | 2 +- src/lxc/lxc_process.c | 4 ++-- src/qemu/qemu_hotplug.c | 10 +++++----- src/qemu/qemu_process.c | 2 +- 8 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 6211d2a51b..9b42ac3e38 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -30276,7 +30276,6 @@ virDomainNetNotifyActualDevice(virConnectPtr conn, int virDomainNetReleaseActualDevice(virConnectPtr conn, - virDomainDef *dom G_GNUC_UNUSED, virDomainNetDef *iface) { virNetworkPtr net = NULL; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index d176bda5f8..95d8fc379d 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -4406,9 +4406,8 @@ virDomainNetNotifyActualDevice(virConnectPtr conn, int virDomainNetReleaseActualDevice(virConnectPtr conn, - virDomainDef *dom, virDomainNetDef *iface) - ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + ATTRIBUTE_NONNULL(1); int virDomainNetBandwidthUpdate(virDomainNetDef *iface, diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index 670fd2881e..ad2ad1ce0e 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -749,7 +749,7 @@ libxlNetworkUnwindDevices(virDomainDef *def) g_autoptr(virConnect) conn = virGetConnectNetwork(); if (conn) - virDomainNetReleaseActualDevice(conn, def, net); + virDomainNetReleaseActualDevice(conn, net); else VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); } diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index 6c843b9054..e42a3dc0a9 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -3440,7 +3440,7 @@ libxlDomainAttachNetDevice(libxlDriverPrivate *driver, } else { virDomainNetRemoveHostdev(vm->def, net); if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK && conn) - virDomainNetReleaseActualDevice(conn, vm->def, net); + virDomainNetReleaseActualDevice(conn, net); } virObjectUnref(cfg); virErrorRestore(&save_err); @@ -3911,7 +3911,7 @@ libxlDomainDetachNetDevice(libxlDriverPrivate *driver, if (detach->type == VIR_DOMAIN_NET_TYPE_NETWORK) { g_autoptr(virConnect) conn = virGetConnectNetwork(); if (conn) - virDomainNetReleaseActualDevice(conn, vm->def, detach); + virDomainNetReleaseActualDevice(conn, detach); else VIR_WARN("Unable to release network device '%s'", NULLSTR(detach->ifname)); } diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index c281998652..39992bdf96 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -4110,7 +4110,7 @@ lxcDomainDetachDeviceNetLive(virDomainObj *vm, if (detach->type == VIR_DOMAIN_NET_TYPE_NETWORK) { g_autoptr(virConnect) conn = virGetConnectNetwork(); if (conn) - virDomainNetReleaseActualDevice(conn, vm->def, detach); + virDomainNetReleaseActualDevice(conn, detach); else VIR_WARN("Unable to release network device '%s'", NULLSTR(detach->ifname)); } diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 20cd3a3d57..bfdcefd01b 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -221,7 +221,7 @@ static void virLXCProcessCleanup(virLXCDriver *driver, } if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK) { if (conn || (conn = virGetConnectNetwork())) - virDomainNetReleaseActualDevice(conn, vm->def, iface); + virDomainNetReleaseActualDevice(conn, iface); else VIR_WARN("Unable to release network device '%s'", NULLSTR(iface->ifname)); } @@ -643,7 +643,7 @@ virLXCProcessSetupInterfaces(virLXCDriver *driver, virDomainNetGetActualBridgeName(iface), iface->ifname)); if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK && netconn) - virDomainNetReleaseActualDevice(netconn, def, iface); + virDomainNetReleaseActualDevice(netconn, iface); } virErrorRestore(&save_err); } diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 0e45bd53e1..bf1ac241ff 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1435,7 +1435,7 @@ qemuDomainAttachNetDevice(virQEMUDriver *driver, if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { if (conn) - virDomainNetReleaseActualDevice(conn, vm->def, net); + virDomainNetReleaseActualDevice(conn, net); else VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); } @@ -4162,7 +4162,7 @@ qemuDomainChangeNet(virQEMUDriver *driver, * no need to change the pointer in the hostdev structure */ if (olddev->type == VIR_DOMAIN_NET_TYPE_NETWORK) { if (conn || (conn = virGetConnectNetwork())) - virDomainNetReleaseActualDevice(conn, vm->def, olddev); + virDomainNetReleaseActualDevice(conn, olddev); else VIR_WARN("Unable to release network device '%s'", NULLSTR(olddev->ifname)); } @@ -4198,7 +4198,7 @@ qemuDomainChangeNet(virQEMUDriver *driver, * replace the entire device object. */ if (newdev && newdev->type == VIR_DOMAIN_NET_TYPE_NETWORK && conn) - virDomainNetReleaseActualDevice(conn, vm->def, newdev); + virDomainNetReleaseActualDevice(conn, newdev); virErrorRestore(&save_err); return ret; @@ -4778,7 +4778,7 @@ qemuDomainRemoveHostDevice(virQEMUDriver *driver, if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { g_autoptr(virConnect) conn = virGetConnectNetwork(); if (conn) - virDomainNetReleaseActualDevice(conn, vm->def, net); + virDomainNetReleaseActualDevice(conn, net); else VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); } @@ -4896,7 +4896,7 @@ qemuDomainRemoveNetDevice(virQEMUDriver *driver, if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { g_autoptr(virConnect) conn = virGetConnectNetwork(); if (conn) - virDomainNetReleaseActualDevice(conn, vm->def, net); + virDomainNetReleaseActualDevice(conn, net); else VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); } else if (net->type == VIR_DOMAIN_NET_TYPE_ETHERNET) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3563ad215c..3abd3ad23a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -8579,7 +8579,7 @@ void qemuProcessStop(virQEMUDriver *driver, virDomainNetRemoveHostdev(def, net); if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { if (conn || (conn = virGetConnectNetwork())) - virDomainNetReleaseActualDevice(conn, vm->def, net); + virDomainNetReleaseActualDevice(conn, net); else VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); } -- 2.43.0

Move domain interface management methods from qemu to hypervisor. This refactoring allows the domain management methods to be shared between CH and qemu drivers. This commit does not introduce any functional changes. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES | 1 + src/hypervisor/domain_interface.c | 457 ++++++++++++++++++++++++++++++ src/hypervisor/domain_interface.h | 45 +++ src/hypervisor/meson.build | 1 + src/libvirt_private.syms | 9 + src/qemu/qemu_command.c | 8 +- src/qemu/qemu_hotplug.c | 5 +- src/qemu/qemu_interface.c | 339 +--------------------- src/qemu/qemu_interface.h | 11 - src/qemu/qemu_process.c | 72 +---- 10 files changed, 533 insertions(+), 415 deletions(-) create mode 100644 src/hypervisor/domain_interface.c create mode 100644 src/hypervisor/domain_interface.h diff --git a/po/POTFILES b/po/POTFILES index 3a51aea5cb..023c041f61 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -92,6 +92,7 @@ src/hyperv/hyperv_util.c src/hyperv/hyperv_wmi.c src/hypervisor/domain_cgroup.c src/hypervisor/domain_driver.c +src/hypervisor/domain_interface.c src/hypervisor/virhostdev.c src/interface/interface_backend_netcf.c src/interface/interface_backend_udev.c diff --git a/src/hypervisor/domain_interface.c b/src/hypervisor/domain_interface.c new file mode 100644 index 0000000000..853814fe78 --- /dev/null +++ b/src/hypervisor/domain_interface.c @@ -0,0 +1,457 @@ +/* + * Copyright (C) 2015-2016 Red Hat, Inc. + * Copyright IBM Corp. 2014 + * + * domain_interface.c: methods to manage guest/domain interfaces + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "datatypes.h" +#include "domain_audit.h" +#include "domain_conf.h" +#include "domain_driver.h" +#include "domain_interface.h" +#include "domain_nwfilter.h" +#include "network_conf.h" +#include "viralloc.h" +#include "virconftypes.h" +#include "virebtables.h" +#include "virfile.h" +#include "virlog.h" +#include "virmacaddr.h" +#include "virnetdevbridge.h" +#include "virnetdevmidonet.h" +#include "virnetdevopenvswitch.h" +#include "virnetdevtap.h" + + +#define VIR_FROM_THIS VIR_FROM_DOMAIN + +VIR_LOG_INIT("domain.interface"); + +bool +virDomainInterfaceIsVnetCompatModel(const virDomainNetDef *net) +{ + return (virDomainNetIsVirtioModel(net) || + net->model == VIR_DOMAIN_NET_MODEL_E1000E || + net->model == VIR_DOMAIN_NET_MODEL_IGB || + net->model == VIR_DOMAIN_NET_MODEL_VMXNET3); +} + +/* virDomainInterfaceEthernetConnect: + * @def: the definition of the VM + * @net: a net definition in the VM + * @ebtables: ebtales context + * @macFilter: whether driver support mac Filtering + * @privileged: whether running as privileged user + * @tapfd: returned array of file descriptors for the domain interface + * @tapfdsize: returned number of file descriptors in @tapfd + * + * Called *only* called if actualType is VIR_DOMAIN_NET_TYPE_ETHERNET + * (i.e. if the connection is made with a tap device) + */ +int +virDomainInterfaceEthernetConnect(virDomainDef *def, + virDomainNetDef *net, + ebtablesContext *ebtables, + bool macFilter, + bool privileged, + int *tapfd, + size_t tapfdSize) +{ + virMacAddr tapmac; + int ret = -1; + unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP; + bool template_ifname = false; + const char *tunpath = "/dev/net/tun"; + const char *auditdev = tunpath; + + if (net->backend.tap) { + tunpath = net->backend.tap; + if (!privileged) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cannot use custom tap device in session mode")); + goto cleanup; + } + } + + if (virDomainInterfaceIsVnetCompatModel(net)) + tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR; + + if (net->managed_tap == VIR_TRISTATE_BOOL_NO) { + if (!net->ifname) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("target dev must be supplied when managed='no'")); + goto cleanup; + } + if (virNetDevExists(net->ifname) != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("target managed='no' but specified dev doesn't exist")); + goto cleanup; + } + + tap_create_flags |= VIR_NETDEV_TAP_CREATE_ALLOW_EXISTING; + + if (virNetDevMacVLanIsMacvtap(net->ifname)) { + auditdev = net->ifname; + if (virNetDevMacVLanTapOpen(net->ifname, tapfd, tapfdSize) < 0) + goto cleanup; + if (virNetDevMacVLanTapSetup(tapfd, tapfdSize, + virDomainInterfaceIsVnetCompatModel(net)) < 0) { + goto cleanup; + } + } else { + if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, + tap_create_flags) < 0) + goto cleanup; + } + } else { + + if (!net->ifname) + template_ifname = true; + + if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, + tap_create_flags) < 0) { + goto cleanup; + } + + /* The tap device's MAC address cannot match the MAC address + * used by the guest. This results in "received packet on + * vnetX with own address as source address" error logs from + * the kernel. + */ + virMacAddrSet(&tapmac, &net->mac); + if (tapmac.addr[0] == 0xFE) + tapmac.addr[0] = 0xFA; + else + tapmac.addr[0] = 0xFE; + + if (virNetDevSetMAC(net->ifname, &tapmac) < 0) + goto cleanup; + + if (virNetDevSetOnline(net->ifname, true) < 0) + goto cleanup; + } + + if (net->script && + virNetDevRunEthernetScript(net->ifname, net->script) < 0) + goto cleanup; + + if (macFilter && + ebtablesAddForwardAllowIn(ebtables, + net->ifname, + &net->mac) < 0) + goto cleanup; + + if (net->filter && + virDomainConfNWFilterInstantiate(def->name, def->uuid, net, false) < 0) { + goto cleanup; + } + + virDomainAuditNetDevice(def, net, auditdev, true); + + ret = 0; + + cleanup: + if (ret < 0) { + size_t i; + + virDomainAuditNetDevice(def, net, auditdev, false); + for (i = 0; i < tapfdSize && tapfd[i] >= 0; i++) + VIR_FORCE_CLOSE(tapfd[i]); + if (template_ifname) + VIR_FREE(net->ifname); + } + + return ret; +} + +/** + * virDomainInterfaceStartDevice: + * @net: net device to start + * + * Based upon the type of device provided, perform the appropriate + * work to completely activate the device and make it reachable from + * the rest of the network. + */ +int +virDomainInterfaceStartDevice(virDomainNetDef *net) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + if (virDomainNetGetActualBridgeMACTableManager(net) == + VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LIBVIRT) { + /* libvirt is managing the FDB of the bridge this device + * is attaching to, so we have turned off learning and + * unicast_flood on the device to prevent the kernel from + * adding any FDB entries for it. This means we need to + * add an fdb entry ourselves, using the MAC address from + * the interface config. + */ + if (virNetDevBridgeFDBAdd(&net->mac, net->ifname, + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) + return -1; + } + break; + + case VIR_DOMAIN_NET_TYPE_DIRECT: { + const char *physdev = virDomainNetGetActualDirectDev(net); + bool isOnline = true; + + /* set the physdev online if necessary. It may already be up, + * in which case we shouldn't re-up it just in case that causes + * some sort of "blip" in the physdev's status. + */ + if (physdev && virNetDevGetOnline(physdev, &isOnline) < 0) + return -1; + if (!isOnline && virNetDevSetOnline(physdev, true) < 0) + return -1; + + /* macvtap devices share their MAC address with the guest + * domain, and if they are set online prior to the domain CPUs + * being started, the host may send out traffic from this + * device that could confuse other entities on the network (in + * particular, if this new domain is the destination of a + * migration, and the source domain is still running, another + * host may mistakenly direct traffic for the guest to the + * destination domain rather than source domain). To prevent + * this, we create the macvtap device with IFF_UP false + * (i.e. "offline") then wait to bring it online until just as + * we are starting the domain CPUs. + */ + if (virNetDevSetOnline(net->ifname, true) < 0) + return -1; + break; + } + + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (virNetDevIPInfoAddToDev(net->ifname, &net->hostIP) < 0) + return -1; + + break; + + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + /* these types all require no action */ + break; + } + + return 0; +} + +/** + * virDomainInterfaceStartDevices: + * @def: domain definition + * + * Set all ifaces associated with this domain to the online state. + */ +int +virDomainInterfaceStartDevices(virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nnets; i++) { + if (virDomainInterfaceStartDevice(def->nets[i]) < 0) + return -1; + } + return 0; +} + +/** + * virDomainInterfaceStopDevice: + * @net: net device to stop + * + * Based upon the type of device provided, perform the appropriate + * work to deactivate the device so that packets aren't forwarded to + * it from the rest of the network. + */ +int +virDomainInterfaceStopDevice(virDomainNetDef *net) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + if (virDomainNetGetActualBridgeMACTableManager(net) == + VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LIBVIRT) { + /* remove the FDB entries that were added during + * virDomainInterfaceStartDevices() + */ + if (virNetDevBridgeFDBDel(&net->mac, net->ifname, + VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | + VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) + return -1; + } + break; + + case VIR_DOMAIN_NET_TYPE_DIRECT: { + const char *physdev = virDomainNetGetActualDirectDev(net); + + /* macvtap interfaces need to be marked !IFF_UP (ie "down") to + * prevent any host-generated traffic sent from this interface + * from putting bad info into the arp caches of other machines + * on this network. + */ + if (virNetDevSetOnline(net->ifname, false) < 0) + return -1; + + /* also mark the physdev down for passthrough macvtap, as the + * physdev has the same MAC address as the macvtap device. + */ + if (virDomainNetGetActualDirectMode(net) == + VIR_NETDEV_MACVLAN_MODE_PASSTHRU && + physdev && virNetDevSetOnline(physdev, false) < 0) + return -1; + break; + } + + case VIR_DOMAIN_NET_TYPE_ETHERNET: + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + /* these types all require no action */ + break; + } + + return 0; +} + +/** + * virDomainInterfaceStopDevices: + * @def: domain definition + * + * Make all interfaces associated with this domain inaccessible from + * the rest of the network. + */ +int +virDomainInterfaceStopDevices(virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nnets; i++) { + if (virDomainInterfaceStopDevice(def->nets[i]) < 0) + return -1; + } + return 0; +} + +/** + * virDomainInterfaceDeleteDevice: + * @def: domain definition + * @net: a net definition in the VM + * @priv_net_created: whether private network created + * @stateDir: driver's stateDir + * + * Disconnect and delete domain interfaces. + */ + +void +virDomainInterfaceDeleteDevice(virDomainDef *def, + virDomainNetDef *net, + bool priv_net_created, + char *stateDir) +{ + const virNetDevVPortProfile *vport = NULL; + g_autoptr(virConnect) conn = NULL; + + vport = virDomainNetGetActualVirtPortProfile(net); + switch (virDomainNetGetActualType(net)) { + case VIR_DOMAIN_NET_TYPE_DIRECT: + if (priv_net_created) { + virNetDevMacVLanDeleteWithVPortProfile(net->ifname, &net->mac, + virDomainNetGetActualDirectDev(net), + virDomainNetGetActualDirectMode(net), + virDomainNetGetActualVirtPortProfile(net), + stateDir); + } + break; + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (net->managed_tap != VIR_TRISTATE_BOOL_NO && net->ifname) { + ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap)); + VIR_FREE(net->ifname); + } + break; + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: +#ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP + if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)) + ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap)); +#endif + break; + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + /* No special cleanup procedure for these types. */ + break; + } + /* release the physical device (or any other resources used by + * this interface in the network driver + */ + if (vport) { + if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) { + ignore_value(virNetDevMidonetUnbindPort(vport)); + } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) { + ignore_value(virNetDevOpenvswitchRemovePort( + virDomainNetGetActualBridgeName(net), + net->ifname)); + } + } + + /* kick the device out of the hostdev list too */ + virDomainNetRemoveHostdev(def, net); + if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { + if (conn || (conn = virGetConnectNetwork())) + virDomainNetReleaseActualDevice(conn, net); + else + VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); + } + +} diff --git a/src/hypervisor/domain_interface.h b/src/hypervisor/domain_interface.h new file mode 100644 index 0000000000..3d15e000cc --- /dev/null +++ b/src/hypervisor/domain_interface.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2015-2016 Red Hat, Inc. + * Copyright IBM Corp. 2014 + * + * domain_interface.h: methods to manage guest/domain interfaces + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "virebtables.h" + +int +virDomainInterfaceEthernetConnect(virDomainDef *def, + virDomainNetDef *net, + ebtablesContext *ebtables, + bool macFilter, + bool privileged, + int *tapfd, + size_t tapfdSize); + +bool +virDomainInterfaceIsVnetCompatModel(const virDomainNetDef *net); + +int virDomainInterfaceStartDevice(virDomainNetDef *net); +int virDomainInterfaceStartDevices(virDomainDef *def); +int virDomainInterfaceStopDevice(virDomainNetDef *net); +int virDomainInterfaceStopDevices(virDomainDef *def); +void virDomainInterfaceDeleteDevice(virDomainDef *def, + virDomainNetDef *net, + bool priv_net_created, + char *stateDir); diff --git a/src/hypervisor/meson.build b/src/hypervisor/meson.build index f35565b16b..819a9a82a2 100644 --- a/src/hypervisor/meson.build +++ b/src/hypervisor/meson.build @@ -1,6 +1,7 @@ hypervisor_sources = [ 'domain_cgroup.c', 'domain_driver.c', + 'domain_interface.c', 'virclosecallbacks.c', 'virhostdev.c', ] diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fc26109029..80d43ed15d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1631,6 +1631,15 @@ virDomainDriverNodeDeviceReset; virDomainDriverParseBlkioDeviceStr; virDomainDriverSetupPersistentDefBlkioParams; +# hypervisor/domain_interface.h +virDomainInterfaceDeleteDevice; +virDomainInterfaceEthernetConnect; +virDomainInterfaceIsVnetCompatModel; +virDomainInterfaceStartDevice; +virDomainInterfaceStartDevices; +virDomainInterfaceStopDevice; +virDomainInterfaceStopDevices; + # hypervisor/virclosecallbacks.h virCloseCallbacksDomainAdd; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 712feb7b81..9289ba5595 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -43,6 +43,7 @@ #include "domain_nwfilter.h" #include "domain_addr.h" #include "domain_conf.h" +#include "domain_interface.h" #include "netdev_bandwidth_conf.h" #include "virnetdevopenvswitch.h" #include "device_conf.h" @@ -8624,8 +8625,11 @@ qemuBuildInterfaceConnect(virDomainObj *vm, break; case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (qemuInterfaceEthernetConnect(vm->def, priv->driver, net, - tapfd, tapfdSize) < 0) + if (virDomainInterfaceEthernetConnect(vm->def, net, + priv->driver->ebtables, + priv->driver->config->macFilter, + priv->driver->privileged, + tapfd, tapfdSize) < 0) return -1; vhostfd = true; break; diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index bf1ac241ff..4341ad18b5 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -39,6 +39,7 @@ #include "qemu_virtiofs.h" #include "domain_audit.h" #include "domain_cgroup.h" +#include "domain_interface.h" #include "netdev_bandwidth_conf.h" #include "domain_nwfilter.h" #include "virlog.h" @@ -1278,7 +1279,7 @@ qemuDomainAttachNetDevice(virQEMUDriver *driver, } /* Set device online immediately */ - if (qemuInterfaceStartDevice(net) < 0) + if (virDomainInterfaceStartDevice(net) < 0) goto cleanup; qemuDomainInterfaceSetDefaultQDisc(driver, net); @@ -4829,7 +4830,7 @@ qemuDomainRemoveNetDevice(virQEMUDriver *driver, * affect the parent device (e.g. macvtap passthrough mode sets * the parent device offline) */ - ignore_value(qemuInterfaceStopDevice(net)); + ignore_value(virDomainInterfaceStopDevice(net)); qemuDomainObjEnterMonitor(vm); if (qemuMonitorRemoveNetdev(priv->mon, hostnet_name) < 0) { diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c index 8856bb95a8..c2007c7043 100644 --- a/src/qemu/qemu_interface.c +++ b/src/qemu/qemu_interface.c @@ -24,6 +24,7 @@ #include "network_conf.h" #include "domain_audit.h" #include "domain_nwfilter.h" +#include "domain_interface.h" #include "qemu_interface.h" #include "viralloc.h" #include "virlog.h" @@ -41,211 +42,6 @@ VIR_LOG_INIT("qemu.qemu_interface"); -/** - * qemuInterfaceStartDevice: - * @net: net device to start - * - * Based upon the type of device provided, perform the appropriate - * work to completely activate the device and make it reachable from - * the rest of the network. - */ -int -qemuInterfaceStartDevice(virDomainNetDef *net) -{ - virDomainNetType actualType = virDomainNetGetActualType(net); - - switch (actualType) { - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: - if (virDomainNetGetActualBridgeMACTableManager(net) - == VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LIBVIRT) { - /* libvirt is managing the FDB of the bridge this device - * is attaching to, so we have turned off learning and - * unicast_flood on the device to prevent the kernel from - * adding any FDB entries for it. This means we need to - * add an fdb entry ourselves, using the MAC address from - * the interface config. - */ - if (virNetDevBridgeFDBAdd(&net->mac, net->ifname, - VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | - VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) - return -1; - } - break; - - case VIR_DOMAIN_NET_TYPE_DIRECT: { - const char *physdev = virDomainNetGetActualDirectDev(net); - bool isOnline = true; - - /* set the physdev online if necessary. It may already be up, - * in which case we shouldn't re-up it just in case that causes - * some sort of "blip" in the physdev's status. - */ - if (physdev && virNetDevGetOnline(physdev, &isOnline) < 0) - return -1; - if (!isOnline && virNetDevSetOnline(physdev, true) < 0) - return -1; - - /* macvtap devices share their MAC address with the guest - * domain, and if they are set online prior to the domain CPUs - * being started, the host may send out traffic from this - * device that could confuse other entities on the network (in - * particular, if this new domain is the destination of a - * migration, and the source domain is still running, another - * host may mistakenly direct traffic for the guest to the - * destination domain rather than source domain). To prevent - * this, we create the macvtap device with IFF_UP false - * (i.e. "offline") then wait to bring it online until just as - * we are starting the domain CPUs. - */ - if (virNetDevSetOnline(net->ifname, true) < 0) - return -1; - break; - } - - case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (virNetDevIPInfoAddToDev(net->ifname, &net->hostIP) < 0) - return -1; - - break; - - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_NULL: - case VIR_DOMAIN_NET_TYPE_VDS: - case VIR_DOMAIN_NET_TYPE_LAST: - /* these types all require no action */ - break; - } - - return 0; -} - -/** - * qemuInterfaceStartDevices: - * @def: domain definition - * - * Set all ifaces associated with this domain to the online state. - */ -int -qemuInterfaceStartDevices(virDomainDef *def) -{ - size_t i; - - for (i = 0; i < def->nnets; i++) { - if (qemuInterfaceStartDevice(def->nets[i]) < 0) - return -1; - } - return 0; -} - - -/** - * qemuInterfaceStopDevice: - * @net: net device to stop - * - * Based upon the type of device provided, perform the appropriate - * work to deactivate the device so that packets aren't forwarded to - * it from the rest of the network. - */ -int -qemuInterfaceStopDevice(virDomainNetDef *net) -{ - virDomainNetType actualType = virDomainNetGetActualType(net); - - switch (actualType) { - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: - if (virDomainNetGetActualBridgeMACTableManager(net) - == VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LIBVIRT) { - /* remove the FDB entries that were added during - * qemuInterfaceStartDevices() - */ - if (virNetDevBridgeFDBDel(&net->mac, net->ifname, - VIR_NETDEVBRIDGE_FDB_FLAG_MASTER | - VIR_NETDEVBRIDGE_FDB_FLAG_TEMP) < 0) - return -1; - } - break; - - case VIR_DOMAIN_NET_TYPE_DIRECT: { - const char *physdev = virDomainNetGetActualDirectDev(net); - - /* macvtap interfaces need to be marked !IFF_UP (ie "down") to - * prevent any host-generated traffic sent from this interface - * from putting bad info into the arp caches of other machines - * on this network. - */ - if (virNetDevSetOnline(net->ifname, false) < 0) - return -1; - - /* also mark the physdev down for passthrough macvtap, as the - * physdev has the same MAC address as the macvtap device. - */ - if (virDomainNetGetActualDirectMode(net) == - VIR_NETDEV_MACVLAN_MODE_PASSTHRU && - physdev && virNetDevSetOnline(physdev, false) < 0) - return -1; - break; - } - - case VIR_DOMAIN_NET_TYPE_ETHERNET: - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_NULL: - case VIR_DOMAIN_NET_TYPE_VDS: - case VIR_DOMAIN_NET_TYPE_LAST: - /* these types all require no action */ - break; - } - - return 0; -} - -/** - * qemuInterfaceStopDevices: - * @def: domain definition - * - * Make all interfaces associated with this domain inaccessible from - * the rest of the network. - */ -int -qemuInterfaceStopDevices(virDomainDef *def) -{ - size_t i; - - for (i = 0; i < def->nnets; i++) { - if (qemuInterfaceStopDevice(def->nets[i]) < 0) - return -1; - } - return 0; -} - - -static bool -qemuInterfaceIsVnetCompatModel(const virDomainNetDef *net) -{ - return (virDomainNetIsVirtioModel(net) || - net->model == VIR_DOMAIN_NET_MODEL_E1000E || - net->model == VIR_DOMAIN_NET_MODEL_IGB || - net->model == VIR_DOMAIN_NET_MODEL_VMXNET3); -} - - /** * qemuInterfaceDirectConnect: * @def: the definition of the VM (needed by 802.1Qbh and audit) @@ -271,7 +67,7 @@ qemuInterfaceDirectConnect(virDomainDef *def, unsigned int macvlan_create_flags = VIR_NETDEV_MACVLAN_CREATE_WITH_TAP; qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net); - if (qemuInterfaceIsVnetCompatModel(net)) + if (virDomainInterfaceIsVnetCompatModel(net)) macvlan_create_flags |= VIR_NETDEV_MACVLAN_VNET_HDR; if (virNetDevMacVLanCreateWithVPortProfile(net->ifname, @@ -409,133 +205,6 @@ qemuCreateInBridgePortWithHelper(virQEMUDriverConfig *cfg, return *tapfd < 0 ? -1 : 0; } - -/* qemuInterfaceEthernetConnect: - * @def: the definition of the VM - * @driver: qemu driver data - * @net: pointer to the VM's interface description - * @tapfd: array of file descriptor return value for the new device - * @tapfdsize: number of file descriptors in @tapfd - * - * Called *only* called if actualType is VIR_DOMAIN_NET_TYPE_ETHERNET - * (i.e. if the connection is made with a tap device) - */ -int -qemuInterfaceEthernetConnect(virDomainDef *def, - virQEMUDriver *driver, - virDomainNetDef *net, - int *tapfd, - size_t tapfdSize) -{ - virMacAddr tapmac; - int ret = -1; - unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP; - bool template_ifname = false; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); - const char *tunpath = "/dev/net/tun"; - const char *auditdev = tunpath; - - if (net->backend.tap) { - tunpath = net->backend.tap; - if (!driver->privileged) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("cannot use custom tap device in session mode")); - goto cleanup; - } - } - - if (qemuInterfaceIsVnetCompatModel(net)) - tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR; - - if (net->managed_tap == VIR_TRISTATE_BOOL_NO) { - if (!net->ifname) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("target dev must be supplied when managed='no'")); - goto cleanup; - } - if (virNetDevExists(net->ifname) != 1) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("target managed='no' but specified dev doesn't exist")); - goto cleanup; - } - - tap_create_flags |= VIR_NETDEV_TAP_CREATE_ALLOW_EXISTING; - - if (virNetDevMacVLanIsMacvtap(net->ifname)) { - auditdev = net->ifname; - if (virNetDevMacVLanTapOpen(net->ifname, tapfd, tapfdSize) < 0) - goto cleanup; - if (virNetDevMacVLanTapSetup(tapfd, tapfdSize, - qemuInterfaceIsVnetCompatModel(net)) < 0) { - goto cleanup; - } - } else { - if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, - tap_create_flags) < 0) - goto cleanup; - } - } else { - - if (!net->ifname) - template_ifname = true; - - if (virNetDevTapCreate(&net->ifname, tunpath, tapfd, tapfdSize, - tap_create_flags) < 0) { - goto cleanup; - } - - /* The tap device's MAC address cannot match the MAC address - * used by the guest. This results in "received packet on - * vnetX with own address as source address" error logs from - * the kernel. - */ - virMacAddrSet(&tapmac, &net->mac); - if (tapmac.addr[0] == 0xFE) - tapmac.addr[0] = 0xFA; - else - tapmac.addr[0] = 0xFE; - - if (virNetDevSetMAC(net->ifname, &tapmac) < 0) - goto cleanup; - - if (virNetDevSetOnline(net->ifname, true) < 0) - goto cleanup; - } - - if (net->script && - virNetDevRunEthernetScript(net->ifname, net->script) < 0) - goto cleanup; - - if (cfg->macFilter && - ebtablesAddForwardAllowIn(driver->ebtables, - net->ifname, - &net->mac) < 0) - goto cleanup; - - if (net->filter && - virDomainConfNWFilterInstantiate(def->name, def->uuid, net, false) < 0) { - goto cleanup; - } - - virDomainAuditNetDevice(def, net, auditdev, true); - - ret = 0; - - cleanup: - if (ret < 0) { - size_t i; - - virDomainAuditNetDevice(def, net, auditdev, false); - for (i = 0; i < tapfdSize && tapfd[i] >= 0; i++) - VIR_FORCE_CLOSE(tapfd[i]); - if (template_ifname) - VIR_FREE(net->ifname); - } - - return ret; -} - - /* qemuInterfaceBridgeConnect: * @def: the definition of the VM * @driver: qemu driver data @@ -578,7 +247,7 @@ qemuInterfaceBridgeConnect(virDomainDef *def, if (!net->ifname) template_ifname = true; - if (qemuInterfaceIsVnetCompatModel(net)) + if (virDomainInterfaceIsVnetCompatModel(net)) tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR; if (driver->privileged) { @@ -598,7 +267,7 @@ qemuInterfaceBridgeConnect(virDomainDef *def, * is attaching to, so we need to turn off learning and * unicast_flood on the device to prevent the kernel from * adding any FDB entries for it. We will add an fdb - * entry ourselves (during qemuInterfaceStartDevices(), + * entry ourselves (during virDomainInterfaceStartDevices(), * using the MAC address from the interface config. */ if (virNetDevBridgePortSetLearning(brname, net->ifname, false) < 0) diff --git a/src/qemu/qemu_interface.h b/src/qemu/qemu_interface.h index 67cbada36e..aee5f9efb0 100644 --- a/src/qemu/qemu_interface.h +++ b/src/qemu/qemu_interface.h @@ -25,11 +25,6 @@ #include "qemu_domain.h" #include "qemu_slirp.h" -int qemuInterfaceStartDevice(virDomainNetDef *net); -int qemuInterfaceStartDevices(virDomainDef *def); -int qemuInterfaceStopDevice(virDomainNetDef *net); -int qemuInterfaceStopDevices(virDomainDef *def); - int qemuInterfaceDirectConnect(virDomainDef *def, virQEMUDriver *driver, virDomainNetDef *net, @@ -37,12 +32,6 @@ int qemuInterfaceDirectConnect(virDomainDef *def, size_t tapfdSize, virNetDevVPortProfileOp vmop); -int qemuInterfaceEthernetConnect(virDomainDef *def, - virQEMUDriver *driver, - virDomainNetDef *net, - int *tapfd, - size_t tapfdSize); - int qemuInterfaceBridgeConnect(virDomainDef *def, virQEMUDriver *driver, virDomainNetDef *net, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3abd3ad23a..a8e3c4b004 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -74,6 +74,7 @@ #include "virhostcpu.h" #include "domain_audit.h" #include "domain_cgroup.h" +#include "domain_interface.h" #include "domain_nwfilter.h" #include "domain_postparse.h" #include "domain_validate.h" @@ -3120,7 +3121,7 @@ qemuProcessStartCPUs(virQEMUDriver *driver, virDomainObj *vm, g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); /* Bring up netdevs before starting CPUs */ - if (qemuInterfaceStartDevices(vm->def) < 0) + if (virDomainInterfaceStartDevices(vm->def) < 0) return -1; VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); @@ -3183,7 +3184,7 @@ int qemuProcessStopCPUs(virQEMUDriver *driver, goto cleanup; /* de-activate netdevs after stopping CPUs */ - ignore_value(qemuInterfaceStopDevices(vm->def)); + ignore_value(virDomainInterfaceStopDevices(vm->def)); if (vm->job->current) ignore_value(virTimeMillisNow(&vm->job->current->stopped)); @@ -8377,11 +8378,9 @@ void qemuProcessStop(virQEMUDriver *driver, qemuDomainObjPrivate *priv = vm->privateData; virErrorPtr orig_err; virDomainDef *def = vm->def; - const virNetDevVPortProfile *vport = NULL; size_t i; g_autofree char *timestamp = NULL; g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); - g_autoptr(virConnect) conn = NULL; bool outgoingMigration; VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%lld, " @@ -8520,69 +8519,12 @@ void qemuProcessStop(virQEMUDriver *driver, } qemuHostdevReAttachDomainDevices(driver, vm->def); - for (i = 0; i < def->nnets; i++) { virDomainNetDef *net = def->nets[i]; - vport = virDomainNetGetActualVirtPortProfile(net); - switch (virDomainNetGetActualType(net)) { - case VIR_DOMAIN_NET_TYPE_DIRECT: - if (QEMU_DOMAIN_NETWORK_PRIVATE(net)->created) { - virNetDevMacVLanDeleteWithVPortProfile(net->ifname, &net->mac, - virDomainNetGetActualDirectDev(net), - virDomainNetGetActualDirectMode(net), - virDomainNetGetActualVirtPortProfile(net), - cfg->stateDir); - } - break; - case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (net->managed_tap != VIR_TRISTATE_BOOL_NO && net->ifname) { - ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap)); - VIR_FREE(net->ifname); - } - break; - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: -#ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP - if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH)) - ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap)); -#endif - break; - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_NULL: - case VIR_DOMAIN_NET_TYPE_VDS: - case VIR_DOMAIN_NET_TYPE_LAST: - /* No special cleanup procedure for these types. */ - break; - } - /* release the physical device (or any other resources used by - * this interface in the network driver - */ - if (vport) { - if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) { - ignore_value(virNetDevMidonetUnbindPort(vport)); - } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) { - ignore_value(virNetDevOpenvswitchRemovePort( - virDomainNetGetActualBridgeName(net), - net->ifname)); - } - } - - /* kick the device out of the hostdev list too */ - virDomainNetRemoveHostdev(def, net); - if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { - if (conn || (conn = virGetConnectNetwork())) - virDomainNetReleaseActualDevice(conn, net); - else - VIR_WARN("Unable to release network device '%s'", NULLSTR(net->ifname)); - } + virDomainInterfaceDeleteDevice(def, + net, + QEMU_DOMAIN_NETWORK_PRIVATE(net)->created, + cfg->stateDir); } retry: -- 2.43.0

virSocketSendMsgWithFDs method send fds along with payload using SCM_RIGHTS. virSocketRecv method polls, receives and sends the response to callers. These methods are required to add network suppport in ch driver. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 2 + src/util/virsocket.c | 116 +++++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 3 + 4 files changed, 122 insertions(+) diff --git a/po/POTFILES b/po/POTFILES index 023c041f61..b594a8dd39 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -326,6 +326,7 @@ src/util/virscsi.c src/util/virscsihost.c src/util/virscsivhost.c src/util/virsecret.c +src/util/virsocket.c src/util/virsocketaddr.c src/util/virstoragefile.c src/util/virstring.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 80d43ed15d..2c9b2635cd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3371,8 +3371,10 @@ virSecureEraseString; # util/virsocket.h +virSocketRecv; virSocketRecvFD; virSocketSendFD; +virSocketSendMsgWithFDs; # util/virsocketaddr.h diff --git a/src/util/virsocket.c b/src/util/virsocket.c index cd6f7ecd1b..8019fd1199 100644 --- a/src/util/virsocket.c +++ b/src/util/virsocket.c @@ -19,11 +19,18 @@ #include <config.h> +#include "virerror.h" #include "virsocket.h" #include "virutil.h" #include "virfile.h" +#include "virlog.h" #include <fcntl.h> +#include <poll.h> + +#define PKT_TIMEOUT_MS 500 /* ms */ + +#define VIR_FROM_THIS VIR_FROM_NONE #ifdef WIN32 @@ -482,6 +489,108 @@ virSocketRecvFD(int sock, int fdflags) return fd; } + +/** + * virSocketSendMsgWithFDs: + * @sock: socket to send payload and fds to + * @payload: payload to send + * @fds: array of fds to send + * @fds_len: len of fds array + + * Send @fds along with @payload to @sock using SCM_RIGHTS. + * Return number of bytes sent on success. + * On error, set errno and return -1. + */ +int +virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, size_t fds_len) +{ + struct msghdr msg; + struct iovec iov[1]; /* Send a single payload, so set vector len to 1 */ + int ret; + char control[CMSG_SPACE(sizeof(int)*fds_len)]; + struct cmsghdr *cmsg; + + memset(&msg, 0, sizeof(msg)); + memset(control, 0, sizeof(control)); + + iov[0].iov_base = (void *) payload; + iov[0].iov_len = strlen(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + /* check to eliminate "potential null pointer dereference" errors during build */ + if (!cmsg) { + virReportSystemError(EFAULT, "%s", _("Couldn't fit control msg header in msg") ); + return -1; + } + + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * fds_len); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * fds_len); + + do { + ret = sendmsg(sock, &msg, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + virReportSystemError(errno, "%s", _("sendmsg failed") ); + return -1; + } + + return ret; +} + +/** + * virSocketRecv: + * @sock: socket to poll and receive response on + * + * This function polls @sock for response + * Returns received response or NULL on error. + */ +char * +virSocketRecv(int sock) +{ + struct pollfd pfds[1]; + char *buf = NULL; + size_t buf_len = 1024; + int ret; + + buf = g_new0(char, buf_len); + + pfds[0].fd = sock; + pfds[0].events = POLLIN; + + do { + ret = poll(pfds, G_N_ELEMENTS(pfds), PKT_TIMEOUT_MS); + } while (ret < 0 && errno == EINTR); + + if (ret <= 0) { + if (ret < 0) { + virReportSystemError(errno, _("Poll on sock %1$d failed"), sock); + } else if (ret == 0) { + virReportSystemError(errno, _("Poll on sock %1$d timed out"), sock); + } + return NULL; + } + + do { + ret = recv(sock, buf, buf_len-1, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + virReportSystemError(errno, _("recv on sock %1$d failed"), sock); + return NULL; + } + + return g_steal_pointer(&buf); +} + #else /* WIN32 */ int virSocketSendFD(int sock G_GNUC_UNUSED, int fd G_GNUC_UNUSED) @@ -496,4 +605,11 @@ virSocketRecvFD(int sock G_GNUC_UNUSED, int fdflags G_GNUC_UNUSED) errno = ENOSYS; return -1; } + +int +virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, size_t fds_len) +{ + errno = ENOSYS; + return -1; +} #endif /* WIN32 */ diff --git a/src/util/virsocket.h b/src/util/virsocket.h index 419da8b3ae..00fbf52603 100644 --- a/src/util/virsocket.h +++ b/src/util/virsocket.h @@ -22,6 +22,9 @@ int virSocketSendFD(int sock, int fd); int virSocketRecvFD(int sock, int fdflags); +int virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, + size_t fd_len); +char * virSocketRecv(int sock); #ifdef WIN32 -- 2.43.0

On 1/16/24 22:25, Praveen K Paladugu wrote:
virSocketSendMsgWithFDs method send fds along with payload using SCM_RIGHTS. virSocketRecv method polls, receives and sends the response to callers.
These methods are required to add network suppport in ch driver.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES | 1 + src/libvirt_private.syms | 2 + src/util/virsocket.c | 116 +++++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 3 + 4 files changed, 122 insertions(+)
diff --git a/po/POTFILES b/po/POTFILES index 023c041f61..b594a8dd39 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -326,6 +326,7 @@ src/util/virscsi.c src/util/virscsihost.c src/util/virscsivhost.c src/util/virsecret.c +src/util/virsocket.c src/util/virsocketaddr.c src/util/virstoragefile.c src/util/virstring.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 80d43ed15d..2c9b2635cd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3371,8 +3371,10 @@ virSecureEraseString;
# util/virsocket.h +virSocketRecv; virSocketRecvFD; virSocketSendFD; +virSocketSendMsgWithFDs;
# util/virsocketaddr.h diff --git a/src/util/virsocket.c b/src/util/virsocket.c index cd6f7ecd1b..8019fd1199 100644 --- a/src/util/virsocket.c +++ b/src/util/virsocket.c @@ -19,11 +19,18 @@
#include <config.h>
+#include "virerror.h" #include "virsocket.h" #include "virutil.h" #include "virfile.h" +#include "virlog.h"
#include <fcntl.h> +#include <poll.h> + +#define PKT_TIMEOUT_MS 500 /* ms */ + +#define VIR_FROM_THIS VIR_FROM_NONE
#ifdef WIN32
@@ -482,6 +489,108 @@ virSocketRecvFD(int sock, int fdflags)
return fd; } + +/** + * virSocketSendMsgWithFDs: + * @sock: socket to send payload and fds to + * @payload: payload to send + * @fds: array of fds to send + * @fds_len: len of fds array + + * Send @fds along with @payload to @sock using SCM_RIGHTS. + * Return number of bytes sent on success. + * On error, set errno and return -1. + */ +int +virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, size_t fds_len) +{ + struct msghdr msg;
This can be initialized to zero so that explicit memset() below can be dropped.
+ struct iovec iov[1]; /* Send a single payload, so set vector len to 1 */ + int ret; + char control[CMSG_SPACE(sizeof(int)*fds_len)];
Yeah, we don't really do VLAs in libvirt. I wish we did and we could even switch to C23 so that this could use an empty initializer and the other memset below could be dropped, but that's fight for another day. For now, this needs to be allocated on the heap. No biggie.
+ struct cmsghdr *cmsg; + + memset(&msg, 0, sizeof(msg)); + memset(control, 0, sizeof(control)); + + iov[0].iov_base = (void *) payload; + iov[0].iov_len = strlen(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + /* check to eliminate "potential null pointer dereference" errors during build */ + if (!cmsg) { + virReportSystemError(EFAULT, "%s", _("Couldn't fit control msg header in msg") );
Formatting.
+ return -1; + } + + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * fds_len); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * fds_len); + + do { + ret = sendmsg(sock, &msg, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + virReportSystemError(errno, "%s", _("sendmsg failed") );
Here too.
+ return -1; + } + + return ret; +} + +/** + * virSocketRecv: + * @sock: socket to poll and receive response on + * + * This function polls @sock for response + * Returns received response or NULL on error. + */ +char * +virSocketRecv(int sock) +{ + struct pollfd pfds[1]; + char *buf = NULL; + size_t buf_len = 1024; + int ret; + + buf = g_new0(char, buf_len); + + pfds[0].fd = sock; + pfds[0].events = POLLIN; + + do { + ret = poll(pfds, G_N_ELEMENTS(pfds), PKT_TIMEOUT_MS); + } while (ret < 0 && errno == EINTR); + + if (ret <= 0) { + if (ret < 0) { + virReportSystemError(errno, _("Poll on sock %1$d failed"), sock); + } else if (ret == 0) { + virReportSystemError(errno, _("Poll on sock %1$d timed out"), sock); + } + return NULL; + } + + do { + ret = recv(sock, buf, buf_len-1, 0);
I still think it's better to check whether there is any data to be read, but hey - if there's any error waiting on the FD, then recv() will pick it up and ...
+ } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + virReportSystemError(errno, _("recv on sock %1$d failed"), sock); + return NULL;
... it's going to be propagated here.
+ } + + return g_steal_pointer(&buf); +}
Now, this function is put into !WIN32 block and also exposed in private syms but lacks WIN32 stub. And if it weren't for poll() it could be just moved out of this block. Now it needs a dummy implementation, otherwise linking on WIN32 would fail.
+ #else /* WIN32 */ int virSocketSendFD(int sock G_GNUC_UNUSED, int fd G_GNUC_UNUSED) @@ -496,4 +605,11 @@ virSocketRecvFD(int sock G_GNUC_UNUSED, int fdflags G_GNUC_UNUSED) errno = ENOSYS; return -1; } + +int +virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, size_t fds_len) +{ + errno = ENOSYS; + return -1; +}
Since the original virSocketSendMsgWithFDs() reports an error this stub must too. Reason? Imagine a caller: if (virSocketSendMsgWithFDs(...) < 0) { /* Should an error be reported here? */ return -1; } The answer depends on which variant of the function was called. And yeah, we could check whether there's an error set, but way easier is to have both version behave the same in this respect.
#endif /* WIN32 */ diff --git a/src/util/virsocket.h b/src/util/virsocket.h index 419da8b3ae..00fbf52603 100644 --- a/src/util/virsocket.h +++ b/src/util/virsocket.h @@ -22,6 +22,9 @@
int virSocketSendFD(int sock, int fd); int virSocketRecvFD(int sock, int fdflags); +int virSocketSendMsgWithFDs(int sock, const char *payload, int *fds, + size_t fd_len); +char * virSocketRecv(int sock);
#ifdef WIN32
Michal

This capability checks if ch can receive multiple fds along with net-add api. This capability is required to enable multiple queues for domain/guest interfaces. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_capabilities.c | 9 +++++++++ src/ch/ch_capabilities.h | 1 + 2 files changed, 10 insertions(+) diff --git a/src/ch/ch_capabilities.c b/src/ch/ch_capabilities.c index b10485820c..b9e62d2d5b 100644 --- a/src/ch/ch_capabilities.c +++ b/src/ch/ch_capabilities.c @@ -50,6 +50,15 @@ virCHCapsInitCHVersionCaps(int version) if (version >= 18000000) virCHCapsSet(chCaps, CH_SERIAL_CONSOLE_IN_PARALLEL); + /* Starting Version 22, add-net api can accept multiple FDs in the request + * This is required to be able to configure queues for virtio-net devices + * from libvirt. + * This capability will be used to gate networking support for ch guests. + * https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v22.0 + */ + if (version >= 22000000) + virCHCapsSet(chCaps, CH_MULTIFD_IN_ADDNET); + return g_steal_pointer(&chCaps); } diff --git a/src/ch/ch_capabilities.h b/src/ch/ch_capabilities.h index 703a6dbfe2..ffb8881a11 100644 --- a/src/ch/ch_capabilities.h +++ b/src/ch/ch_capabilities.h @@ -26,6 +26,7 @@ typedef enum { /* 0 */ CH_KERNEL_API_DEPRCATED, /* Use `payload` in place of `kernel` api */ CH_SERIAL_CONSOLE_IN_PARALLEL, /* Serial and Console ports can work in parallel */ + CH_MULTIFD_IN_ADDNET, /* Cloud-hypervisor can accept multiple FDs in add-net api */ CH_CAPS_LAST /* this must always be the last item */ } virCHCapsFlags; -- 2.43.0

enable VIR_DOMAIN_NET_TYPE_ETHERNET network support for ch guests. Tested with following interface config: <interface type='ethernet'> <target dev='chtap0' managed="yes"/> <model type='virtio'/> <driver queues='2'/> <interface> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 ++++++++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 ++++++++++++++++++++ src/ch/ch_interface.h | 35 +++++++ src/ch/ch_monitor.c | 213 ++++++++++++++++-------------------------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 +++++++++++++++++++++++++++++++- src/ch/meson.build | 2 + 10 files changed, 430 insertions(+), 142 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h diff --git a/po/POTFILES b/po/POTFILES index b594a8dd39..e48b9023e2 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -21,6 +21,7 @@ src/bhyve/bhyve_process.c src/ch/ch_conf.c src/ch/ch_domain.c src/ch/ch_driver.c +src/ch/ch_interface.c src/ch/ch_monitor.c src/ch/ch_process.c src/conf/backup_conf.c diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 5b9b42540d..579eca894e 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -23,6 +23,7 @@ #include "virdomainobjlist.h" #include "virthread.h" #include "ch_capabilities.h" +#include "virebtables.h" #define CH_DRIVER_NAME "CH" #define CH_CMD "cloud-hypervisor" @@ -75,6 +76,9 @@ struct _virCHDriver /* pid file FD, ensures two copies of the driver can't use the same root */ int lockFD; + + /* Immutable pointer, lockless APIs. Pointless abstraction */ + ebtablesContext *ebtables; }; virCaps *virCHDriverCapsInit(void); diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index c0d6c75b1d..25610a9459 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -22,6 +22,7 @@ #include "ch_domain.h" #include "domain_driver.h" +#include "domain_validate.h" #include "virchrdev.h" #include "virlog.h" #include "virtime.h" @@ -355,3 +356,43 @@ virCHDomainObjFromDomain(virDomainPtr domain) return vm; } + +int +virCHDomainValidateActualNetDef(virDomainNetDef *net) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + /* hypervisor-agnostic validation */ + if (virDomainActualNetDefValidate(net) < 0) + return -1; + + /* CH specific validation */ + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (net->guestIP.nips > 1) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("ethernet type supports a single guest ip")); + return -1; + } + break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_DIRECT: + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + default: + break; + } + + return 0; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 4990914e9f..8dea2b2123 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -75,3 +75,6 @@ virCHDomainGetMachineName(virDomainObj *vm); virDomainObj * virCHDomainObjFromDomain(virDomainPtr domain); + +int +virCHDomainValidateActualNetDef(virDomainNetDef *net); diff --git a/src/ch/ch_interface.c b/src/ch/ch_interface.c new file mode 100644 index 0000000000..ba90103c4e --- /dev/null +++ b/src/ch/ch_interface.c @@ -0,0 +1,100 @@ + +/* + * Copyright Microsoft Corp. 2023 + * + * ch_interface.c: methods to connect guest interfaces to appropriate host + * backends + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "domain_conf.h" +#include "domain_interface.h" +#include "virebtables.h" +#include "viralloc.h" +#include "ch_interface.h" +#include "virjson.h" +#include "virlog.h" + + +#define VIR_FROM_THIS VIR_FROM_CH + +VIR_LOG_INIT("ch.ch_interface"); + +/** + * virCHConnetNetworkInterfaces: + * @driver: pointer to ch driver object + * @vm: pointer to domain definition + * @net: pointer to a guest net + * @nicindexes: returned array of FDs of guest interfaces + * @nnicindexes: returned number of guest interfaces + * + * + * Returns 0 on success, -1 on error. + */ +int +virCHConnetNetworkInterfaces(virCHDriver *driver, + virDomainDef *vm, + virDomainNetDef *net, + int *tapfds, int **nicindexes, size_t *nnicindexes) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_ETHERNET: + + if (virDomainInterfaceEthernetConnect(vm, net, + driver->ebtables, false, + driver->privileged, tapfds, + net->driver.virtio.queues) < 0) + return -1; + + G_GNUC_FALLTHROUGH; + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_DIRECT: + if (nicindexes && nnicindexes && net->ifname) { + int nicindex = 0; + + if (virNetDevGetIndex(net->ifname, &nicindex) < 0) + return -1; + + VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); + } + + break; + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + default: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported Network type %1$d"), actualType); + return -1; + } + + return 0; +} diff --git a/src/ch/ch_interface.h b/src/ch/ch_interface.h new file mode 100644 index 0000000000..df87d4511f --- /dev/null +++ b/src/ch/ch_interface.h @@ -0,0 +1,35 @@ +/* + * Copyright Microsoft Corp. 2023 + * + * ch_interface.c: methods to connect guest interfaces to appropriate host + * backends + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + + +#include "ch_conf.h" +#include "virconftypes.h" + + +int +virCHConnetNetworkInterfaces(virCHDriver *driver, + virDomainDef *vmdef, + virDomainNetDef *netdef, + int *tapfds, + int **nicindexes, + size_t *nnicindexes); diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 6f960c3a51..12f2468cc7 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -24,7 +24,11 @@ #include <unistd.h> #include <curl/curl.h> +#include "datatypes.h" +#include "ch_conf.h" +#include "ch_interface.h" #include "ch_monitor.h" +#include "domain_interface.h" #include "viralloc.h" #include "vircommand.h" #include "virerror.h" @@ -258,148 +262,98 @@ virCHMonitorBuildDisksJson(virJSONValue *content, virDomainDef *vmdef) return 0; } -static int -virCHMonitorBuildNetJson(virJSONValue *nets, - virDomainNetDef *netdef, - size_t *nnicindexes, - int **nicindexes) +/** + * virCHMonitorBuildNetJson: + * @net: pointer to a guest network definition + * @jsonstr: returned network json + * + * Build net json to send to CH + * Returns 0 on success or -1 in case of error + */ +int +virCHMonitorBuildNetJson(virDomainNetDef *net, char **jsonstr) { - virDomainNetType netType = virDomainNetGetActualType(netdef); char macaddr[VIR_MAC_STRING_BUFLEN]; - g_autoptr(virJSONValue) net = NULL; - - // check net type at first - net = virJSONValueNewObject(); - - switch (netType) { - case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (netdef->guestIP.nips == 1) { - const virNetDevIPAddr *ip = netdef->guestIP.ips[0]; - g_autofree char *addr = NULL; - virSocketAddr netmask; - g_autofree char *netmaskStr = NULL; - - if (!(addr = virSocketAddrFormat(&ip->address))) - return -1; - if (virJSONValueObjectAppendString(net, "ip", addr) < 0) - return -1; - - if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to translate net prefix %1$d to netmask"), - ip->prefix); - return -1; - } - if (!(netmaskStr = virSocketAddrFormat(&netmask))) - return -1; - if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0) - return -1; - } else if (netdef->guestIP.nips > 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("ethernet type supports a single guest ip")); - } + g_autoptr(virJSONValue) net_json = virJSONValueNewObject(); + virDomainNetType actualType = virDomainNetGetActualType(net); - /* network and bridge use a tap device, and direct uses a - * macvtap device - */ - if (nicindexes && nnicindexes && netdef->ifname) { - int nicindex = 0; - if (virNetDevGetIndex(netdef->ifname, &nicindex) < 0) - return -1; - VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); - } - break; - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("vhost_user type support UNIX socket in this CH")); - return -1; - } else { - if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0) - return -1; - if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0) - return -1; - } - break; - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: - case VIR_DOMAIN_NET_TYPE_DIRECT: - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_NULL: - case VIR_DOMAIN_NET_TYPE_VDS: - case VIR_DOMAIN_NET_TYPE_LAST: - default: - virReportEnumRangeError(virDomainNetType, netType); - return -1; - } - - if (netdef->ifname != NULL) { - if (virJSONValueObjectAppendString(net, "tap", netdef->ifname) < 0) - return -1; - } - if (virJSONValueObjectAppendString(net, "mac", virMacAddrFormat(&netdef->mac, macaddr)) < 0) - return -1; + if (actualType == VIR_DOMAIN_NET_TYPE_ETHERNET) { + const virNetDevIPAddr *ip; + g_autofree char *addr = NULL; + virSocketAddr netmask; + g_autofree char *netmaskStr = NULL; + + /* nips should be 1, this is already validated in + virCHDomainValidateActualNetDef */ + if (net->guestIP.nips != 1) + return -1; + ip = net->guestIP.ips[0]; - if (netdef->virtio != NULL) { - if (netdef->virtio->iommu == VIR_TRISTATE_SWITCH_ON) { - if (virJSONValueObjectAppendBoolean(net, "iommu", true) < 0) - return -1; - } - } - if (netdef->driver.virtio.queues) { - if (virJSONValueObjectAppendNumberInt(net, "num_queues", netdef->driver.virtio.queues) < 0) + + if (!(addr = virSocketAddrFormat(&ip->address))) return -1; - } - if (netdef->driver.virtio.rx_queue_size || netdef->driver.virtio.tx_queue_size) { - if (netdef->driver.virtio.rx_queue_size != netdef->driver.virtio.tx_queue_size) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("virtio rx_queue_size option %1$d is not same with tx_queue_size %2$d"), - netdef->driver.virtio.rx_queue_size, - netdef->driver.virtio.tx_queue_size); + if (virJSONValueObjectAppendString(net_json, "ip", addr) < 0) + return -1; + + if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to translate net prefix %1$d to netmask"), + ip->prefix); return -1; } - if (virJSONValueObjectAppendNumberInt(net, "queue_size", netdef->driver.virtio.rx_queue_size) < 0) + + if (!(netmaskStr = virSocketAddrFormat(&netmask))) + return -1; + if (virJSONValueObjectAppendString(net_json, "mask", netmaskStr) < 0) return -1; } - if (virJSONValueArrayAppend(nets, &net) < 0) + if (virJSONValueObjectAppendString(net_json, "mac", + virMacAddrFormat(&net->mac, macaddr)) < 0) return -1; - return 0; -} - -static int -virCHMonitorBuildNetsJson(virJSONValue *content, - virDomainDef *vmdef, - size_t *nnicindexes, - int **nicindexes) -{ - g_autoptr(virJSONValue) nets = NULL; - size_t i; + if (net->virtio != NULL) { + if (net->virtio->iommu == VIR_TRISTATE_SWITCH_ON) { + if (virJSONValueObjectAppendBoolean(net_json, "iommu", true) < 0) + return -1; + } + } - if (vmdef->nnets > 0) { - nets = virJSONValueNewArray(); + /* Cloud-Hypervisor expects number of queues. 1 for rx and 1 for tx. + * Multiply queue pairs by 2 to provide total number of queues to CH + */ + if (net->driver.virtio.queues) { + if (virJSONValueObjectAppendNumberInt(net_json, "num_queues", + 2 * net->driver.virtio.queues) < 0) + return -1; + } - for (i = 0; i < vmdef->nnets; i++) { - if (virCHMonitorBuildNetJson(nets, vmdef->nets[i], - nnicindexes, nicindexes) < 0) - return -1; + if (net->driver.virtio.rx_queue_size || net->driver.virtio.tx_queue_size) { + if (net->driver.virtio.rx_queue_size != + net->driver.virtio.tx_queue_size) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("virtio rx_queue_size option %1$d is not same with tx_queue_size %2$d"), + net->driver.virtio.rx_queue_size, + net->driver.virtio.tx_queue_size); + return -1; } - if (virJSONValueObjectAppend(content, "net", &nets) < 0) + if (virJSONValueObjectAppendNumberInt(net_json, "queue_size", + net->driver.virtio.rx_queue_size) < 0) + return -1; + } + + if (net->mtu) { + if (virJSONValueObjectAppendNumberInt(net_json, "mtu", net->mtu) < 0) return -1; } + if (!(*jsonstr = virJSONValueToString(net_json, false))) + return -1; + return 0; } @@ -456,11 +410,8 @@ virCHMonitorBuildDevicesJson(virJSONValue *content, } static int -virCHMonitorBuildVMJson(virCHDriver *driver, - virDomainDef *vmdef, - char **jsonstr, - size_t *nnicindexes, - int **nicindexes) +virCHMonitorBuildVMJson(virCHDriver *driver, virDomainDef *vmdef, + char **jsonstr) { g_autoptr(virJSONValue) content = virJSONValueNewObject(); @@ -490,10 +441,6 @@ virCHMonitorBuildVMJson(virCHDriver *driver, if (virCHMonitorBuildDisksJson(content, vmdef) < 0) return -1; - - if (virCHMonitorBuildNetsJson(content, vmdef, nnicindexes, nicindexes) < 0) - return -1; - if (virCHMonitorBuildDevicesJson(content, vmdef) < 0) return -1; @@ -877,10 +824,7 @@ virCHMonitorShutdownVMM(virCHMonitor *mon) } int -virCHMonitorCreateVM(virCHDriver *driver, - virCHMonitor *mon, - size_t *nnicindexes, - int **nicindexes) +virCHMonitorCreateVM(virCHDriver *driver, virCHMonitor *mon) { g_autofree char *url = NULL; int responseCode = 0; @@ -892,8 +836,7 @@ virCHMonitorCreateVM(virCHDriver *driver, headers = curl_slist_append(headers, "Accept: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json"); - if (virCHMonitorBuildVMJson(driver, mon->vm->def, &payload, - nnicindexes, nicindexes) != 0) + if (virCHMonitorBuildVMJson(driver, mon->vm->def, &payload) != 0) return -1; VIR_WITH_OBJECT_LOCK_GUARD(mon) { diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index bbfa77cdff..47b4e7abbd 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -104,10 +104,7 @@ void virCHMonitorClose(virCHMonitor *mon); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHMonitor, virCHMonitorClose); -int virCHMonitorCreateVM(virCHDriver *driver, - virCHMonitor *mon, - size_t *nnicindexes, - int **nicindexes); +int virCHMonitorCreateVM(virCHDriver *driver, virCHMonitor *mon); int virCHMonitorBootVM(virCHMonitor *mon); int virCHMonitorShutdownVM(virCHMonitor *mon); int virCHMonitorRebootVM(virCHMonitor *mon); @@ -123,3 +120,5 @@ size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, virCHMonitorThreadInfo **threads); int virCHMonitorGetIOThreads(virCHMonitor *mon, virDomainIOThreadInfo ***iothreads); +int +virCHMonitorBuildNetJson(virDomainNetDef *netdef, char **jsonstr); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index f3bb4a7280..5bf24932d0 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -27,9 +27,13 @@ #include "ch_monitor.h" #include "ch_process.h" #include "domain_cgroup.h" +#include "domain_interface.h" #include "virerror.h" +#include "virfile.h" #include "virjson.h" #include "virlog.h" +#include "virstring.h" +#include "ch_interface.h" #define VIR_FROM_THIS VIR_FROM_CH @@ -448,13 +452,148 @@ virCHProcessSetupVcpus(virDomainObj *vm) return 0; } +/** + * chProcessAddNetworkDevices: + * @driver: pointer to ch driver object + * @mon: pointer to the monitor object + * @vmdef: pointer to domain definition + * @nicindexes: returned array of FDs of guest interfaces + * @nnicindexes: returned number of network indexes + * + * Send tap fds to CH process via AddNet api. Capture the network indexes of + * guest interfaces in nicindexes. + * + * Returns 0 on success, -1 on error. + */ +static int +chProcessAddNetworkDevices(virCHDriver *driver, + virCHMonitor *mon, + virDomainDef *vmdef, + int **nicindexes, + size_t *nnicindexes) +{ + size_t i, j, tapfd_len; + int mon_sockfd, http_res, ret; + g_autofree int *tapfds = NULL; + g_autoptr(virJSONValue) net = NULL; + struct sockaddr_un server_addr; + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) http_headers = VIR_BUFFER_INITIALIZER; + + if (!virBitmapIsBitSet(driver->chCaps, CH_MULTIFD_IN_ADDNET)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Guest networking is not supported by this version of ch")); + return -1; + } + + mon_sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (mon_sockfd < 0) { + virReportSystemError(errno, "%s", _("Failed to open a UNIX socket")); + return -1; + } + + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sun_family = AF_UNIX; + if (virStrcpyStatic(server_addr.sun_path, mon->socketpath) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("UNIX socket path '%1$s' too long"), mon->socketpath); + goto err; + } + + if (connect(mon_sockfd, (struct sockaddr *)&server_addr, + sizeof(server_addr)) == -1) { + virReportSystemError(errno, "%s", _("Failed to connect to mon socket")); + goto err; + } + + virBufferAddLit(&http_headers, "PUT /api/v1/vm.add-net HTTP/1.1\r\n"); + virBufferAddLit(&http_headers, "Host: localhost\r\n"); + virBufferAddLit(&http_headers, "Content-Type: application/json\r\n"); + + for (i = 0; i < vmdef->nnets; i++) { + g_autofree char *payload = NULL; + g_autofree char *response = NULL; + + if (vmdef->nets[i]->driver.virtio.queues == 0) { + /* "queues" here refers to queue pairs. When 0, initialize + * queue pairs to 1. + */ + vmdef->nets[i]->driver.virtio.queues = 1; + } + tapfd_len = vmdef->nets[i]->driver.virtio.queues; + + if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("net definition failed validation")); + goto err; + } + + tapfds = g_new0(int, tapfd_len); + memset(tapfds, -1, (tapfd_len) * sizeof(int)); + + /* Connect Guest interfaces */ + if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds, + nicindexes, nnicindexes) < 0) + goto err; + + if (virCHMonitorBuildNetJson(vmdef->nets[i], &payload) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to build net json")); + goto err; + } + + VIR_DEBUG("payload sent with net-add request to CH = %s", payload); + + virBufferAsprintf(&buf, "%s", virBufferCurrentContent(&http_headers)); + virBufferAsprintf(&buf, "Content-Length: %ld\r\n\r\n", strlen(payload)); + virBufferAsprintf(&buf, "%s", payload); + payload = virBufferContentAndReset(&buf); + + if (virSocketSendMsgWithFDs(mon_sockfd, payload, tapfds, tapfd_len) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to send net-add request to CH")); + goto err; + } + + /* Close sent tap fds in Libvirt, as they have been dup()ed in CH */ + for (j = 0; j < tapfd_len; j++) { + VIR_FORCE_CLOSE(tapfds[j]); + } + + /* Process the response from CH */ + response = virSocketRecv(mon_sockfd); + if (response == NULL) { + VIR_ERROR(_("Failed while receiving response from CH")); + goto err; + } + + /* Parse the HTTP response code */ + ret = sscanf(response, "HTTP/1.%*d %d", &http_res); + if (ret != 1) { + VIR_ERROR(_("Failed to parse HTTP response code")); + goto err; + } + if (http_res != 204 && http_res != 200) { + VIR_ERROR(_("Unexpected response from CH: %1$d"), http_res); + goto err; + } + } + + VIR_FORCE_CLOSE(mon_sockfd); + return 0; + + err: + VIR_FORCE_CLOSE(mon_sockfd); + return -1; +} + /** * virCHProcessStart: * @driver: pointer to driver structure * @vm: pointer to virtual machine structure * @reason: reason for switching vm to running state * - * Starts Cloud-Hypervisor listen on a local socket + * Starts Cloud-Hypervisor listening on a local socket * * Returns 0 on success or -1 in case of error */ @@ -483,8 +622,7 @@ virCHProcessStart(virCHDriver *driver, goto cleanup; } - if (virCHMonitorCreateVM(driver, priv->monitor, - &nnicindexes, &nicindexes) < 0) { + if (virCHMonitorCreateVM(driver, priv->monitor) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to create guest VM")); goto cleanup; @@ -495,6 +633,13 @@ virCHProcessStart(virCHDriver *driver, vm->def->id = vm->pid; priv->machineName = virCHDomainGetMachineName(vm); + if (chProcessAddNetworkDevices(driver, priv->monitor, vm->def, + &nicindexes, &nnicindexes) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed while adding guest interfaces")); + goto cleanup; + } + if (virDomainCgroupSetupCgroup("ch", vm, nnicindexes, nicindexes, &priv->cgroup, @@ -507,6 +652,10 @@ virCHProcessStart(virCHDriver *driver, if (virCHProcessInitCpuAffinity(vm) < 0) goto cleanup; + /* Bring up netdevs before starting CPUs */ + if (virDomainInterfaceStartDevices(vm->def) < 0) + return -1; + if (virCHMonitorBootVM(priv->monitor) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to boot guest VM")); @@ -552,6 +701,9 @@ virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, int ret; int retries = 0; virCHDomainObjPrivate *priv = vm->privateData; + virCHDriverConfig *cfg = virCHDriverGetConfig(driver); + virDomainDef *def = vm->def; + size_t i; VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d", vm->def->name, (int)vm->pid, (int)reason); @@ -560,6 +712,14 @@ virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, g_clear_pointer(&priv->monitor, virCHMonitorClose); } + /* de-activate netdevs after stopping vm */ + ignore_value(virDomainInterfaceStopDevices(vm->def)); + + for (i = 0; i < def->nnets; i++) { + virDomainNetDef *net = def->nets[i]; + virDomainInterfaceDeleteDevice(def, net, false, cfg->stateDir); + } + retry: if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, diff --git a/src/ch/meson.build b/src/ch/meson.build index 6c8bf9c43f..633966aac7 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -7,6 +7,8 @@ ch_driver_sources = [ 'ch_domain.h', 'ch_driver.c', 'ch_driver.h', + 'ch_interface.c', + 'ch_interface.h', 'ch_monitor.c', 'ch_monitor.h', 'ch_process.c', -- 2.43.0

On 1/16/24 22:25, Praveen K Paladugu wrote:
enable VIR_DOMAIN_NET_TYPE_ETHERNET network support for ch guests.
Tested with following interface config:
<interface type='ethernet'> <target dev='chtap0' managed="yes"/> <model type='virtio'/> <driver queues='2'/> <interface>
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 ++++++++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 ++++++++++++++++++++ src/ch/ch_interface.h | 35 +++++++ src/ch/ch_monitor.c | 213 ++++++++++++++++-------------------------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 +++++++++++++++++++++++++++++++- src/ch/meson.build | 2 + 10 files changed, 430 insertions(+), 142 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h
diff --git a/po/POTFILES b/po/POTFILES index b594a8dd39..e48b9023e2 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -21,6 +21,7 @@ src/bhyve/bhyve_process.c src/ch/ch_conf.c src/ch/ch_domain.c src/ch/ch_driver.c +src/ch/ch_interface.c src/ch/ch_monitor.c src/ch/ch_process.c src/conf/backup_conf.c diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 5b9b42540d..579eca894e 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -23,6 +23,7 @@ #include "virdomainobjlist.h" #include "virthread.h" #include "ch_capabilities.h" +#include "virebtables.h"
#define CH_DRIVER_NAME "CH" #define CH_CMD "cloud-hypervisor" @@ -75,6 +76,9 @@ struct _virCHDriver
/* pid file FD, ensures two copies of the driver can't use the same root */ int lockFD; + + /* Immutable pointer, lockless APIs. Pointless abstraction */ + ebtablesContext *ebtables; };
virCaps *virCHDriverCapsInit(void); diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index c0d6c75b1d..25610a9459 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -22,6 +22,7 @@
#include "ch_domain.h" #include "domain_driver.h" +#include "domain_validate.h" #include "virchrdev.h" #include "virlog.h" #include "virtime.h" @@ -355,3 +356,43 @@ virCHDomainObjFromDomain(virDomainPtr domain)
return vm; } + +int +virCHDomainValidateActualNetDef(virDomainNetDef *net) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + /* hypervisor-agnostic validation */ + if (virDomainActualNetDefValidate(net) < 0) + return -1; + + /* CH specific validation */ + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (net->guestIP.nips > 1) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("ethernet type supports a single guest ip")); + return -1; + } + break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_DIRECT: + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + default: + break; + } + + return 0; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 4990914e9f..8dea2b2123 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -75,3 +75,6 @@ virCHDomainGetMachineName(virDomainObj *vm);
virDomainObj * virCHDomainObjFromDomain(virDomainPtr domain); + +int +virCHDomainValidateActualNetDef(virDomainNetDef *net); diff --git a/src/ch/ch_interface.c b/src/ch/ch_interface.c new file mode 100644 index 0000000000..ba90103c4e --- /dev/null +++ b/src/ch/ch_interface.c @@ -0,0 +1,100 @@ + +/* + * Copyright Microsoft Corp. 2023 + * + * ch_interface.c: methods to connect guest interfaces to appropriate host + * backends + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "domain_conf.h" +#include "domain_interface.h" +#include "virebtables.h" +#include "viralloc.h" +#include "ch_interface.h" +#include "virjson.h" +#include "virlog.h" + + +#define VIR_FROM_THIS VIR_FROM_CH + +VIR_LOG_INIT("ch.ch_interface"); + +/** + * virCHConnetNetworkInterfaces: + * @driver: pointer to ch driver object + * @vm: pointer to domain definition + * @net: pointer to a guest net + * @nicindexes: returned array of FDs of guest interfaces + * @nnicindexes: returned number of guest interfaces + * + * + * Returns 0 on success, -1 on error. + */ +int +virCHConnetNetworkInterfaces(virCHDriver *driver, + virDomainDef *vm, + virDomainNetDef *net, + int *tapfds, int **nicindexes, size_t *nnicindexes) +{ + virDomainNetType actualType = virDomainNetGetActualType(net); + + + switch (actualType) { + case VIR_DOMAIN_NET_TYPE_ETHERNET: + + if (virDomainInterfaceEthernetConnect(vm, net, + driver->ebtables, false, + driver->privileged, tapfds, + net->driver.virtio.queues) < 0) + return -1; + + G_GNUC_FALLTHROUGH; + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_DIRECT: + if (nicindexes && nnicindexes && net->ifname) { + int nicindex = 0; + + if (virNetDevGetIndex(net->ifname, &nicindex) < 0) + return -1; + + VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); + } + + break; + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_NULL: + case VIR_DOMAIN_NET_TYPE_VDS: + case VIR_DOMAIN_NET_TYPE_LAST: + default: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported Network type %1$d"), actualType); + return -1; + } + + return 0; +} diff --git a/src/ch/ch_interface.h b/src/ch/ch_interface.h new file mode 100644 index 0000000000..df87d4511f --- /dev/null +++ b/src/ch/ch_interface.h @@ -0,0 +1,35 @@ +/* + * Copyright Microsoft Corp. 2023 + * + * ch_interface.c: methods to connect guest interfaces to appropriate host + * backends + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + + +#include "ch_conf.h" +#include "virconftypes.h" + + +int +virCHConnetNetworkInterfaces(virCHDriver *driver, + virDomainDef *vmdef, + virDomainNetDef *netdef, + int *tapfds, + int **nicindexes, + size_t *nnicindexes); diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 6f960c3a51..12f2468cc7 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -24,7 +24,11 @@ #include <unistd.h> #include <curl/curl.h>
+#include "datatypes.h" +#include "ch_conf.h" +#include "ch_interface.h" #include "ch_monitor.h" +#include "domain_interface.h" #include "viralloc.h" #include "vircommand.h" #include "virerror.h" @@ -258,148 +262,98 @@ virCHMonitorBuildDisksJson(virJSONValue *content, virDomainDef *vmdef) return 0; }
-static int -virCHMonitorBuildNetJson(virJSONValue *nets, - virDomainNetDef *netdef, - size_t *nnicindexes, - int **nicindexes) +/** + * virCHMonitorBuildNetJson: + * @net: pointer to a guest network definition + * @jsonstr: returned network json + * + * Build net json to send to CH + * Returns 0 on success or -1 in case of error + */ +int +virCHMonitorBuildNetJson(virDomainNetDef *net, char **jsonstr) { - virDomainNetType netType = virDomainNetGetActualType(netdef); char macaddr[VIR_MAC_STRING_BUFLEN]; - g_autoptr(virJSONValue) net = NULL; - - // check net type at first - net = virJSONValueNewObject(); - - switch (netType) { - case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (netdef->guestIP.nips == 1) { - const virNetDevIPAddr *ip = netdef->guestIP.ips[0]; - g_autofree char *addr = NULL; - virSocketAddr netmask; - g_autofree char *netmaskStr = NULL; - - if (!(addr = virSocketAddrFormat(&ip->address))) - return -1; - if (virJSONValueObjectAppendString(net, "ip", addr) < 0) - return -1; - - if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to translate net prefix %1$d to netmask"), - ip->prefix); - return -1; - } - if (!(netmaskStr = virSocketAddrFormat(&netmask))) - return -1; - if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0) - return -1; - } else if (netdef->guestIP.nips > 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("ethernet type supports a single guest ip")); - } + g_autoptr(virJSONValue) net_json = virJSONValueNewObject(); + virDomainNetType actualType = virDomainNetGetActualType(net);
- /* network and bridge use a tap device, and direct uses a - * macvtap device - */ - if (nicindexes && nnicindexes && netdef->ifname) { - int nicindex = 0;
- if (virNetDevGetIndex(netdef->ifname, &nicindex) < 0) - return -1;
- VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); - } - break; - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("vhost_user type support UNIX socket in this CH")); - return -1; - } else { - if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0) - return -1; - if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0) - return -1; - } - break; - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: - case VIR_DOMAIN_NET_TYPE_DIRECT: - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_NULL: - case VIR_DOMAIN_NET_TYPE_VDS: - case VIR_DOMAIN_NET_TYPE_LAST: - default: - virReportEnumRangeError(virDomainNetType, netType); - return -1; - } - - if (netdef->ifname != NULL) { - if (virJSONValueObjectAppendString(net, "tap", netdef->ifname) < 0) - return -1; - } - if (virJSONValueObjectAppendString(net, "mac", virMacAddrFormat(&netdef->mac, macaddr)) < 0) - return -1; + if (actualType == VIR_DOMAIN_NET_TYPE_ETHERNET) { + const virNetDevIPAddr *ip; + g_autofree char *addr = NULL; + virSocketAddr netmask; + g_autofree char *netmaskStr = NULL; + + /* nips should be 1, this is already validated in + virCHDomainValidateActualNetDef */ + if (net->guestIP.nips != 1) + return -1;
+ ip = net->guestIP.ips[0];
- if (netdef->virtio != NULL) { - if (netdef->virtio->iommu == VIR_TRISTATE_SWITCH_ON) { - if (virJSONValueObjectAppendBoolean(net, "iommu", true) < 0) - return -1; - } - } - if (netdef->driver.virtio.queues) { - if (virJSONValueObjectAppendNumberInt(net, "num_queues", netdef->driver.virtio.queues) < 0) + + if (!(addr = virSocketAddrFormat(&ip->address))) return -1; - }
- if (netdef->driver.virtio.rx_queue_size || netdef->driver.virtio.tx_queue_size) { - if (netdef->driver.virtio.rx_queue_size != netdef->driver.virtio.tx_queue_size) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("virtio rx_queue_size option %1$d is not same with tx_queue_size %2$d"), - netdef->driver.virtio.rx_queue_size, - netdef->driver.virtio.tx_queue_size); + if (virJSONValueObjectAppendString(net_json, "ip", addr) < 0) + return -1; + + if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to translate net prefix %1$d to netmask"), + ip->prefix); return -1; } - if (virJSONValueObjectAppendNumberInt(net, "queue_size", netdef->driver.virtio.rx_queue_size) < 0) + + if (!(netmaskStr = virSocketAddrFormat(&netmask))) + return -1; + if (virJSONValueObjectAppendString(net_json, "mask", netmaskStr) < 0) return -1; }
- if (virJSONValueArrayAppend(nets, &net) < 0) + if (virJSONValueObjectAppendString(net_json, "mac", + virMacAddrFormat(&net->mac, macaddr)) < 0) return -1;
- return 0; -} - -static int -virCHMonitorBuildNetsJson(virJSONValue *content, - virDomainDef *vmdef, - size_t *nnicindexes, - int **nicindexes) -{ - g_autoptr(virJSONValue) nets = NULL; - size_t i; + if (net->virtio != NULL) { + if (net->virtio->iommu == VIR_TRISTATE_SWITCH_ON) { + if (virJSONValueObjectAppendBoolean(net_json, "iommu", true) < 0) + return -1; + } + }
- if (vmdef->nnets > 0) { - nets = virJSONValueNewArray(); + /* Cloud-Hypervisor expects number of queues. 1 for rx and 1 for tx. + * Multiply queue pairs by 2 to provide total number of queues to CH + */ + if (net->driver.virtio.queues) { + if (virJSONValueObjectAppendNumberInt(net_json, "num_queues", + 2 * net->driver.virtio.queues) < 0) + return -1; + }
- for (i = 0; i < vmdef->nnets; i++) { - if (virCHMonitorBuildNetJson(nets, vmdef->nets[i], - nnicindexes, nicindexes) < 0) - return -1; + if (net->driver.virtio.rx_queue_size || net->driver.virtio.tx_queue_size) { + if (net->driver.virtio.rx_queue_size != + net->driver.virtio.tx_queue_size) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("virtio rx_queue_size option %1$d is not same with tx_queue_size %2$d"), + net->driver.virtio.rx_queue_size, + net->driver.virtio.tx_queue_size); + return -1; } - if (virJSONValueObjectAppend(content, "net", &nets) < 0) + if (virJSONValueObjectAppendNumberInt(net_json, "queue_size", + net->driver.virtio.rx_queue_size) < 0) + return -1; + } + + if (net->mtu) { + if (virJSONValueObjectAppendNumberInt(net_json, "mtu", net->mtu) < 0) return -1; }
+ if (!(*jsonstr = virJSONValueToString(net_json, false))) + return -1; + return 0; }
@@ -456,11 +410,8 @@ virCHMonitorBuildDevicesJson(virJSONValue *content, }
static int -virCHMonitorBuildVMJson(virCHDriver *driver, - virDomainDef *vmdef, - char **jsonstr, - size_t *nnicindexes, - int **nicindexes) +virCHMonitorBuildVMJson(virCHDriver *driver, virDomainDef *vmdef, + char **jsonstr) { g_autoptr(virJSONValue) content = virJSONValueNewObject();
@@ -490,10 +441,6 @@ virCHMonitorBuildVMJson(virCHDriver *driver, if (virCHMonitorBuildDisksJson(content, vmdef) < 0) return -1;
- - if (virCHMonitorBuildNetsJson(content, vmdef, nnicindexes, nicindexes) < 0) - return -1; - if (virCHMonitorBuildDevicesJson(content, vmdef) < 0) return -1;
@@ -877,10 +824,7 @@ virCHMonitorShutdownVMM(virCHMonitor *mon) }
int -virCHMonitorCreateVM(virCHDriver *driver, - virCHMonitor *mon, - size_t *nnicindexes, - int **nicindexes) +virCHMonitorCreateVM(virCHDriver *driver, virCHMonitor *mon) { g_autofree char *url = NULL; int responseCode = 0; @@ -892,8 +836,7 @@ virCHMonitorCreateVM(virCHDriver *driver, headers = curl_slist_append(headers, "Accept: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json");
- if (virCHMonitorBuildVMJson(driver, mon->vm->def, &payload, - nnicindexes, nicindexes) != 0) + if (virCHMonitorBuildVMJson(driver, mon->vm->def, &payload) != 0) return -1;
VIR_WITH_OBJECT_LOCK_GUARD(mon) { diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index bbfa77cdff..47b4e7abbd 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -104,10 +104,7 @@ void virCHMonitorClose(virCHMonitor *mon); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHMonitor, virCHMonitorClose);
-int virCHMonitorCreateVM(virCHDriver *driver, - virCHMonitor *mon, - size_t *nnicindexes, - int **nicindexes); +int virCHMonitorCreateVM(virCHDriver *driver, virCHMonitor *mon); int virCHMonitorBootVM(virCHMonitor *mon); int virCHMonitorShutdownVM(virCHMonitor *mon); int virCHMonitorRebootVM(virCHMonitor *mon); @@ -123,3 +120,5 @@ size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, virCHMonitorThreadInfo **threads); int virCHMonitorGetIOThreads(virCHMonitor *mon, virDomainIOThreadInfo ***iothreads); +int +virCHMonitorBuildNetJson(virDomainNetDef *netdef, char **jsonstr); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index f3bb4a7280..5bf24932d0 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -27,9 +27,13 @@ #include "ch_monitor.h" #include "ch_process.h" #include "domain_cgroup.h" +#include "domain_interface.h" #include "virerror.h" +#include "virfile.h" #include "virjson.h" #include "virlog.h" +#include "virstring.h" +#include "ch_interface.h"
#define VIR_FROM_THIS VIR_FROM_CH
@@ -448,13 +452,148 @@ virCHProcessSetupVcpus(virDomainObj *vm) return 0; }
+/** + * chProcessAddNetworkDevices: + * @driver: pointer to ch driver object + * @mon: pointer to the monitor object + * @vmdef: pointer to domain definition + * @nicindexes: returned array of FDs of guest interfaces + * @nnicindexes: returned number of network indexes + * + * Send tap fds to CH process via AddNet api. Capture the network indexes of + * guest interfaces in nicindexes. + * + * Returns 0 on success, -1 on error. + */ +static int +chProcessAddNetworkDevices(virCHDriver *driver, + virCHMonitor *mon, + virDomainDef *vmdef, + int **nicindexes, + size_t *nnicindexes) +{ + size_t i, j, tapfd_len; + int mon_sockfd, http_res, ret; + g_autofree int *tapfds = NULL; + g_autoptr(virJSONValue) net = NULL; + struct sockaddr_un server_addr; + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; + g_auto(virBuffer) http_headers = VIR_BUFFER_INITIALIZER;
Couple of unused variables, couple of variables used solely within the loop below. BTW: just like there's g_autofree, libvirt has its own VIR_AUTOCLOSE, which can be used like this: VIR_AUTOCLOSE mon_sockfd = -1; ... mon_sockfd = socket(...); if (mon_sockfd < 0) return -1; ... if (some_error) return -1; ... return 0; and just like you'd expect - the mon_sockfd is closed on every return path and NOT leaked. This way, you can simplify the error path which then becomes just 'return -1' at which point all 'goto err' can simply be replaced with that 'return -1'. BTW2: it's an unwritten rule, well, custom, that 'ret' is used to hold value that's eventually returned from the function. In this case it's used to hold a temporary value - retval from a called function. In such cases we use anything else, but 'ret'. 'rc' is my favorite.
+ + if (!virBitmapIsBitSet(driver->chCaps, CH_MULTIFD_IN_ADDNET)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Guest networking is not supported by this version of ch")); + return -1; + } + + mon_sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (mon_sockfd < 0) { + virReportSystemError(errno, "%s", _("Failed to open a UNIX socket")); + return -1; + } + + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sun_family = AF_UNIX; + if (virStrcpyStatic(server_addr.sun_path, mon->socketpath) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("UNIX socket path '%1$s' too long"), mon->socketpath); + goto err; + } + + if (connect(mon_sockfd, (struct sockaddr *)&server_addr, + sizeof(server_addr)) == -1) { + virReportSystemError(errno, "%s", _("Failed to connect to mon socket")); + goto err; + } + + virBufferAddLit(&http_headers, "PUT /api/v1/vm.add-net HTTP/1.1\r\n"); + virBufferAddLit(&http_headers, "Host: localhost\r\n"); + virBufferAddLit(&http_headers, "Content-Type: application/json\r\n"); + + for (i = 0; i < vmdef->nnets; i++) { + g_autofree char *payload = NULL; + g_autofree char *response = NULL; + + if (vmdef->nets[i]->driver.virtio.queues == 0) { + /* "queues" here refers to queue pairs. When 0, initialize + * queue pairs to 1. + */ + vmdef->nets[i]->driver.virtio.queues = 1; + } + tapfd_len = vmdef->nets[i]->driver.virtio.queues; + + if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("net definition failed validation")); + goto err; + } + + tapfds = g_new0(int, tapfd_len); + memset(tapfds, -1, (tapfd_len) * sizeof(int)); + + /* Connect Guest interfaces */ + if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds, + nicindexes, nnicindexes) < 0) + goto err; + + if (virCHMonitorBuildNetJson(vmdef->nets[i], &payload) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to build net json")); + goto err; + } + + VIR_DEBUG("payload sent with net-add request to CH = %s", payload); + + virBufferAsprintf(&buf, "%s", virBufferCurrentContent(&http_headers)); + virBufferAsprintf(&buf, "Content-Length: %ld\r\n\r\n", strlen(payload)); + virBufferAsprintf(&buf, "%s", payload); + payload = virBufferContentAndReset(&buf); + + if (virSocketSendMsgWithFDs(mon_sockfd, payload, tapfds, tapfd_len) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to send net-add request to CH")); + goto err; + } +
Ooops, so if sending FDs fails for whatever reason, tapfds[] are not closed.
+ /* Close sent tap fds in Libvirt, as they have been dup()ed in CH */ + for (j = 0; j < tapfd_len; j++) { + VIR_FORCE_CLOSE(tapfds[j]); + } + + /* Process the response from CH */ + response = virSocketRecv(mon_sockfd); + if (response == NULL) { + VIR_ERROR(_("Failed while receiving response from CH"));
No, VIR_ERROR() does not do what you think it does. It shouldn't really be used anywhere, but a brief moment in daemon startup when loggin is not set up yet. Here, virSocketRecv() reports an error in case of NULL, but ...
+ goto err; + } + + /* Parse the HTTP response code */ + ret = sscanf(response, "HTTP/1.%*d %d", &http_res); + if (ret != 1) { + VIR_ERROR(_("Failed to parse HTTP response code")); + goto err;
.. here, if this error is met then control jumps onto err label, this error message is printed only into the logs but not reported to the caller of API. For instance, if this was called via 'virsh start', then user would see: # virsh start myCHGuest Failed to start domain: 'myCHGuest' An error occurred, but the cause is unknown and would need to open logs only to find what went wrong.
+ } + if (http_res != 204 && http_res != 200) { + VIR_ERROR(_("Unexpected response from CH: %1$d"), http_res); + goto err; + } + } + + VIR_FORCE_CLOSE(mon_sockfd); + return 0; + + err: + VIR_FORCE_CLOSE(mon_sockfd); + return -1; +} +
Michal

On 1/16/24 22:25, Praveen K Paladugu wrote:
v2: * Refactor virSocketRecvHttpResponse to return responses without parsing http responses. * Use errno to report errors in virsocket.c * Address WIN32 build failure in virsocket.c * Fix code indentations
Praveen K Paladugu (5): conf: Drop unused parameter hypervisor: Move domain interface mgmt methods util: Add util methods required by ch networking ch: Introduce version based cap for network support ch: Enable ETHERNET Network mode support
po/POTFILES | 3 + src/ch/ch_capabilities.c | 9 + src/ch/ch_capabilities.h | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 +++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 +++++++ src/ch/ch_interface.h | 35 +++ src/ch/ch_monitor.c | 213 +++++--------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 ++++++++++- src/ch/meson.build | 2 + src/conf/domain_conf.c | 1 - src/conf/domain_conf.h | 3 +- src/hypervisor/domain_interface.c | 457 ++++++++++++++++++++++++++++++ src/hypervisor/domain_interface.h | 45 +++ src/hypervisor/meson.build | 1 + src/libvirt_private.syms | 11 + src/libxl/libxl_domain.c | 2 +- src/libxl/libxl_driver.c | 4 +- src/lxc/lxc_driver.c | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_command.c | 8 +- src/qemu/qemu_hotplug.c | 15 +- src/qemu/qemu_interface.c | 339 +--------------------- src/qemu/qemu_interface.h | 11 - src/qemu/qemu_process.c | 72 +---- src/util/virsocket.c | 116 ++++++++ src/util/virsocket.h | 3 + 29 files changed, 1107 insertions(+), 571 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h create mode 100644 src/hypervisor/domain_interface.c create mode 100644 src/hypervisor/domain_interface.h
Now, this is almost correct. I only have couple of suggestions. No need to resend another version. I've uploaded these patches among with my suggestions to my gitlab: https://gitlab.com/MichalPrivoznik/libvirt/-/tree/ch_networking?ref_type=hea... You'll find 'fixup' commits there - these contains fixes to those small issues I've raised in my review. I'll squash them in before merging. Then, there's one suggestion - "Possible move of virSocketRecv() into ch_process.c" - honestly, I don't feel like virSocketRecv() belongs into src/util/ but I can't explain why really. For the time being we can have it in src/ch/. I'd split this commit and squash its parts into respective commits. Please do check if code still works even with my fixups and whether you agree with suggested changes. If so, I can give my reviewed by and merge these. Michal

On Tue, Jan 30, 2024 at 06:28:05PM +0100, Michal Pr??vozn??k wrote:
On 1/16/24 22:25, Praveen K Paladugu wrote:
v2: * Refactor virSocketRecvHttpResponse to return responses without parsing http responses. * Use errno to report errors in virsocket.c * Address WIN32 build failure in virsocket.c * Fix code indentations
Praveen K Paladugu (5): conf: Drop unused parameter hypervisor: Move domain interface mgmt methods util: Add util methods required by ch networking ch: Introduce version based cap for network support ch: Enable ETHERNET Network mode support
po/POTFILES | 3 + src/ch/ch_capabilities.c | 9 + src/ch/ch_capabilities.h | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 +++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 +++++++ src/ch/ch_interface.h | 35 +++ src/ch/ch_monitor.c | 213 +++++--------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 ++++++++++- src/ch/meson.build | 2 + src/conf/domain_conf.c | 1 - src/conf/domain_conf.h | 3 +- src/hypervisor/domain_interface.c | 457 ++++++++++++++++++++++++++++++ src/hypervisor/domain_interface.h | 45 +++ src/hypervisor/meson.build | 1 + src/libvirt_private.syms | 11 + src/libxl/libxl_domain.c | 2 +- src/libxl/libxl_driver.c | 4 +- src/lxc/lxc_driver.c | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_command.c | 8 +- src/qemu/qemu_hotplug.c | 15 +- src/qemu/qemu_interface.c | 339 +--------------------- src/qemu/qemu_interface.h | 11 - src/qemu/qemu_process.c | 72 +---- src/util/virsocket.c | 116 ++++++++ src/util/virsocket.h | 3 + 29 files changed, 1107 insertions(+), 571 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h create mode 100644 src/hypervisor/domain_interface.c create mode 100644 src/hypervisor/domain_interface.h
Now, this is almost correct. I only have couple of suggestions. No need to resend another version. I've uploaded these patches among with my suggestions to my gitlab:
https://gitlab.com/MichalPrivoznik/libvirt/-/tree/ch_networking?ref_type=hea...
You'll find 'fixup' commits there - these contains fixes to those small issues I've raised in my review. I'll squash them in before merging.
Then, there's one suggestion - "Possible move of virSocketRecv() into ch_process.c" - honestly, I don't feel like virSocketRecv() belongs into src/util/ but I can't explain why really. For the time being we can have it in src/ch/. I'd split this commit and squash its parts into respective commits.
I don't feel strongly about keep virSocketRecv() in src/util. The method seemed generic enough to keep it in src/util. If you are not comfortable keeping it there, I am fine with moving it to src/ch.
Please do check if code still works even with my fixups and whether you agree with suggested changes. If so, I can give my reviewed by and merge
these.
Thanks for the detailed review on this patchset. I tested the patchset in above branch and I noticed 2 bugs. One introduced by me in v2, and one introduced by a fixup commit. First bug is fixed by https://github.com/praveen-pk/libvirt/commit/6c3b068957d854c0b4f51925d3e87a8.... Seems like I missed to test a domain XML without an IP address to guest interface. This patch fixes the bug. Second bug is fixed by https://github.com/praveen-pk/libvirt/commit/a7f7bcefbcc97f755b5075e52233288.... As `control` is now allocated from heap, the size is incorrectly set in `msg.msg_controllen`. This patch fixes it. I marked both these commits as `fixup2`. With these commits, the patchset looks good. Please go ahead and merge. If you'd like for me to send you a v3 with all the commit properly squashed, I am happy to do that as well. Regards, Praveen
Michal _______________________________________________ Devel mailing list -- devel@lists.libvirt.org To unsubscribe send an email to devel-leave@lists.libvirt.org

On 2/1/24 16:40, Praveen Paladugu wrote:
On Tue, Jan 30, 2024 at 06:28:05PM +0100, Michal Pr??vozn??k wrote:
On 1/16/24 22:25, Praveen K Paladugu wrote:
v2: * Refactor virSocketRecvHttpResponse to return responses without parsing http responses. * Use errno to report errors in virsocket.c * Address WIN32 build failure in virsocket.c * Fix code indentations
Praveen K Paladugu (5): conf: Drop unused parameter hypervisor: Move domain interface mgmt methods util: Add util methods required by ch networking ch: Introduce version based cap for network support ch: Enable ETHERNET Network mode support
po/POTFILES | 3 + src/ch/ch_capabilities.c | 9 + src/ch/ch_capabilities.h | 1 + src/ch/ch_conf.h | 4 + src/ch/ch_domain.c | 41 +++ src/ch/ch_domain.h | 3 + src/ch/ch_interface.c | 100 +++++++ src/ch/ch_interface.h | 35 +++ src/ch/ch_monitor.c | 213 +++++--------- src/ch/ch_monitor.h | 7 +- src/ch/ch_process.c | 166 ++++++++++- src/ch/meson.build | 2 + src/conf/domain_conf.c | 1 - src/conf/domain_conf.h | 3 +- src/hypervisor/domain_interface.c | 457 ++++++++++++++++++++++++++++++ src/hypervisor/domain_interface.h | 45 +++ src/hypervisor/meson.build | 1 + src/libvirt_private.syms | 11 + src/libxl/libxl_domain.c | 2 +- src/libxl/libxl_driver.c | 4 +- src/lxc/lxc_driver.c | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_command.c | 8 +- src/qemu/qemu_hotplug.c | 15 +- src/qemu/qemu_interface.c | 339 +--------------------- src/qemu/qemu_interface.h | 11 - src/qemu/qemu_process.c | 72 +---- src/util/virsocket.c | 116 ++++++++ src/util/virsocket.h | 3 + 29 files changed, 1107 insertions(+), 571 deletions(-) create mode 100644 src/ch/ch_interface.c create mode 100644 src/ch/ch_interface.h create mode 100644 src/hypervisor/domain_interface.c create mode 100644 src/hypervisor/domain_interface.h
Now, this is almost correct. I only have couple of suggestions. No need to resend another version. I've uploaded these patches among with my suggestions to my gitlab:
https://gitlab.com/MichalPrivoznik/libvirt/-/tree/ch_networking?ref_type=hea...
You'll find 'fixup' commits there - these contains fixes to those small issues I've raised in my review. I'll squash them in before merging.
Then, there's one suggestion - "Possible move of virSocketRecv() into ch_process.c" - honestly, I don't feel like virSocketRecv() belongs into src/util/ but I can't explain why really. For the time being we can have it in src/ch/. I'd split this commit and squash its parts into respective commits.
I don't feel strongly about keep virSocketRecv() in src/util. The method seemed generic enough to keep it in src/util. If you are not comfortable keeping it there, I am fine with moving it to src/ch.
Please do check if code still works even with my fixups and whether you agree with suggested changes. If so, I can give my reviewed by and merge
these.
Thanks for the detailed review on this patchset. I tested the patchset in above branch and I noticed 2 bugs. One introduced by me in v2, and one introduced by a fixup commit.
First bug is fixed by https://github.com/praveen-pk/libvirt/commit/6c3b068957d854c0b4f51925d3e87a8.... Seems like I missed to test a domain XML without an IP address to guest interface. This patch fixes the bug.
Second bug is fixed by https://github.com/praveen-pk/libvirt/commit/a7f7bcefbcc97f755b5075e52233288.... As `control` is now allocated from heap, the size is incorrectly set in `msg.msg_controllen`. This patch fixes it.
Oops, yes. I've changed commits and pushed. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal
participants (4)
-
Michal Prívozník
-
Praveen K Paladugu
-
Praveen K Paladugu
-
Praveen Paladugu