On 11/19/2012 11:51 AM, Michal Privoznik wrote:
Network should be notified if we plug in or unplug an
interface, so it can perform some action, e.g. set/unset
network part of QoS.
---
src/Makefile.am | 7 ++-
src/conf/domain_conf.h | 1 +
src/conf/network_conf.c | 1 +
src/conf/network_conf.h | 3 +
src/libvirt_network.syms | 8 ++
src/network/bridge_driver.c | 165 +++++++++++++++++++++++++++++++++++++++++++
src/network/bridge_driver.h | 10 ++-
src/qemu/qemu_command.c | 32 ++++++---
src/qemu/qemu_process.c | 12 +++
9 files changed, 225 insertions(+), 14 deletions(-)
create mode 100644 src/libvirt_network.syms
diff --git a/src/Makefile.am b/src/Makefile.am
index 1f32263..9b14ef8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1366,6 +1366,10 @@ if WITH_ATOMIC_OPS_PTHREAD
USED_SYM_FILES += libvirt_atomic.syms
endif
+if WITH_NETWORK
+USED_SYM_FILES += libvirt_network.syms
+endif
+
EXTRA_DIST += \
libvirt_public.syms \
libvirt_private.syms \
@@ -1379,7 +1383,8 @@ EXTRA_DIST += \
libvirt_sasl.syms \
libvirt_vmx.syms \
libvirt_xenxs.syms \
- libvirt_libssh2.syms
+ libvirt_libssh2.syms \
+ libvirt_network.syms
GENERATED_SYM_FILES = libvirt.syms libvirt.def libvirt_qemu.def
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 091879e..09c705a 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -862,6 +862,7 @@ struct _virDomainNetDef {
virNetDevBandwidthPtr bandwidth;
virNetDevVlan vlan;
int linkstate;
+ unsigned int class_id; /* Class ID for 'floor' */
};
/* Used for prefix of ifname of any network name generated dynamically
diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c
index 41831e0..80189e6 100644
--- a/src/conf/network_conf.c
+++ b/src/conf/network_conf.c
@@ -318,6 +318,7 @@ virNetworkAssignDef(virNetworkObjListPtr nets,
}
virNetworkObjLock(network);
network->def = def;
+ network->class_id = 3; /* next free class id */
I don't really like the "magic number" characteristic of the 3 here. Can
we give this a #define or something?
nets->objs[nets->count] = network;
nets->count++;
diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h
index 3e46304..8a8d1e4 100644
--- a/src/conf/network_conf.h
+++ b/src/conf/network_conf.h
@@ -221,6 +221,9 @@ struct _virNetworkObj {
virNetworkDefPtr def; /* The current definition */
virNetworkDefPtr newDef; /* New definition to activate at shutdown */
+
+ unsigned int class_id; /* next class ID for QoS */
Okay, so this is just a monotonically increasing counter so that each
interface gets a new and different class_id. Maybe you should call it
"nextFreeClassID" or something like that, so that everyone understands
it's not a class id used by the network. Or you might want to do this
with a bitmap so you can re-use id's of interfaces that are
disconnected. (can virBitmaps being dynamically expanded?)
+ unsigned long long floor_sum; /* sum of all 'floor'-s of
attached NICs */
And you keep track of the sum of all floors here.
So, what happens if libvirtd is restarted? It looks like they both go
back to their initial values.
And what about hotplug?
This is a similar problem to the pool of interfaces used by
macvtap/hostdev modes - a network with an interface pool needs to have
the "inuse" counters of each interface refreshed whenever libvirtd
restarts. So the necessary hooks are already in place:
networkAllocateActualDevice - called any time an interface with
type='network' is initially
allocated and connected.
networkNotifyActualDevice - called for each type='network' interface
of each active domain
any time libvirtd restarts
networkReleaseActualDevice - called for every type='network'
interface as it is being
disconnected and destroyed.
These are called for *all* type='network' interfaces, not just those
that need to allocate a physical netdev from a pool.
Rather than adding your own new hooks (and figuring out all the places
they need to be called), I think it would be better to use the existing
hooks (perhaps calling a reduced/renamed version of your functions,
which can possibly be moved over to ). That will have two advantages:
1) It will assure that the bandwidth functions are called at all the
right times, including hotplug/unplug, and libvirtd restart
2) It will continue the process of consolidating all network-related
functionality need for these three events into 3 functions which may
some day be moved into their own daemom with a public (within libvirt
anyway) API accessible via RPC (thus allowing non-privileged libvirts to
have full networking functionality).
Note that you will probably want to save the interface class_id in the
iface->actual (as a matter of fact, in
iface->data.network.actual->bandwidth, which can be retrieved with
virDomainNetGetActualBandwidth()) so that it's saved properly in the
interface's state without polluting the interface's config. Then it will
be read back from the state file during the restart and available during
networkNotifyActualDevice() of each interface. I guess you can then
re-set the network->class_id by checking interface->class_id and setting
network->class_id = MAX(network->class_id, iface->class_id + 1);
for each encountered interface (or add it to a bitmap, if a) bitmaps can
be enlarged dynamically or b) we can determine a reasonable maximum
limit on number of active domains). At the same time, you'll recompute
the network->floor_sum iteratively as the network is called with a
notify for each interface.
};
typedef struct _virNetworkObjList virNetworkObjList;
diff --git a/src/libvirt_network.syms b/src/libvirt_network.syms
new file mode 100644
index 0000000..14f76ec
--- /dev/null
+++ b/src/libvirt_network.syms
@@ -0,0 +1,8 @@
+#
+# Network-specific symbols
+#
+
+# bridge_driver.h
+virNetDevBandwidthPlug;
+virNetDevBandwidthUnplug;
+virNetDevBandwidthUpdateRate;
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
index 256b601..44fa56e 100644
--- a/src/network/bridge_driver.c
+++ b/src/network/bridge_driver.c
@@ -4192,3 +4192,168 @@ cleanup:
error:
goto cleanup;
}
+
+/**
+ * networkCheckBandwidth:
+ * @net: network QoS
+ * @iface: interface QoS
+ * @new_rate: new rate for non guaranteed class
+ *
+ * Returns: -1 if plugging would overcommit network QoS
+ * 0 if plugging is safe (@new_rate updated)
+ * 1 if no QoS is set (@new_rate untouched)
+ */
+static int
+networkCheckBandwidth(virNetworkObjPtr net,
+ virDomainNetDefPtr iface,
+ unsigned long long *new_rate)
+{
+ int ret = -1;
+ virNetDevBandwidthPtr netBand = net->def->bandwidth;
+ virNetDevBandwidthPtr ifaceBand = iface->bandwidth;
+ unsigned long long tmp_floor_sum = net->floor_sum;
+ unsigned long long tmp_new_rate = 0;
+
+ if (!ifaceBand || !ifaceBand->in || !ifaceBand->in->floor ||
+ !netBand || !netBand->in)
+ return 1;
+
+ tmp_new_rate = netBand->in->average;
+ tmp_floor_sum += ifaceBand->in->floor;
+
+ /* check against peak */
+ if (netBand->in->peak) {
+ tmp_new_rate = netBand->in->peak;
+ if (tmp_floor_sum > netBand->in->peak) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("Cannot plug '%s' interface into '%s'
because it "
+ "would overcommit 'peak' on network
'%s'"),
+ iface->ifname,
+ net->def->bridge,
+ net->def->name);
+ goto cleanup;
+ }
+ } else if (tmp_floor_sum > netBand->in->average) {
+ /* tmp_floor_sum can be between 'average' and 'peak' iff
'peak' is set.
+ * Otherwise, tmp_floor_sum must be below 'average'. */
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("Cannot plug '%s' interface into '%s'
because it "
+ "would overcommit 'average' on network
'%s'"),
+ iface->ifname,
+ net->def->bridge,
+ net->def->name);
+ goto cleanup;
+ }
+
+ *new_rate = tmp_new_rate;
+ ret = 0;
+
+cleanup:
+ return ret;
+}
+
+int
+networkNotifyPlug(virNetworkPtr network,
+ virDomainNetDefPtr iface)
+{
+ struct network_driver *driver = network->conn->networkPrivateData;
+ virNetworkObjPtr net = NULL;
+ int ret = -1;
+ int plug_ret;
+ unsigned long long new_rate = 0;
+
+ networkDriverLock(driver);
+ net = virNetworkFindByUUID(&driver->networks, network->uuid);
+ networkDriverUnlock(driver);
+
+ if ((plug_ret = networkCheckBandwidth(net, iface, &new_rate)) < 0) {
+ /* helper reported error */
+ goto cleanup;
+ }
+
+ if (plug_ret > 0) {
+ /* no QoS needs to be set; claim success */
+ ret = 0;
+ goto cleanup;
+ }
+
+ plug_ret = virNetDevBandwidthPlug(net->def->bridge,
+ net->def->bandwidth,
+ iface->ifname,
+ &iface->mac,
+ iface->bandwidth,
+ net->class_id);
+ if (plug_ret < 0) {
+ ignore_value(virNetDevBandwidthUnplug(net->def->bridge,
+ net->class_id));
+ goto cleanup;
+ }
+
+ if (plug_ret > 0) {
+ /* Well, this is embarrassing. The networkCheckBandwidth helper
+ * says we need to set a QoS, but virNetDevBandwidthPlug says
+ * we don't need to. It's almost certainly a bug then. */
+ VIR_WARN("Something strange happened. You may want to upgrade
libvirt");
+ ret = 0;
+ goto cleanup;
+ }
+
+ /* QoS was set, generate new class ID */
+ iface->class_id = net->class_id;
+ net->class_id++;
+ /* update sum of 'floor'-s of attached NICs */
+ net->floor_sum += iface->bandwidth->in->floor;
+ /* update rate for non guaranteed NICs */
+ new_rate -= net->floor_sum;
+ if (virNetDevBandwidthUpdateRate(net->def->bridge, "1:2",
+ net->def->bandwidth, new_rate) < 0)
+ VIR_WARN("Unable to update rate for 1:2 class on %s bridge",
+ net->def->bridge);
+
+ ret = 0;
+
+cleanup:
+ if (net)
+ virNetworkObjUnlock(net);
+ return ret;
+}
+
+int
+networkNotifyUnplug(virDomainNetDefPtr iface)
+{
+ struct network_driver *driver = driverState;
+ virNetworkObjPtr net = NULL;
+ int ret = 0;
+ unsigned long long new_rate;
+
+ networkDriverLock(driver);
+ net = virNetworkFindByName(&driver->networks, iface->data.network.name);
+ networkDriverUnlock(driver);
+
+ if (iface->class_id) {
+ /* we must remove class from bridge */
+ new_rate = net->def->bandwidth->in->average;
+
+ if (net->def->bandwidth->in->peak > 0)
+ new_rate = net->def->bandwidth->in->peak;
+
+ ret = virNetDevBandwidthUnplug(net->def->bridge, iface->class_id);
+ if (ret < 0)
+ goto cleanup;
+ /* update sum of 'floor'-s of attached NICs */
+ net->floor_sum -= iface->bandwidth->in->floor;
+ /* update rate for non guaranteed NICs */
+ new_rate -= net->floor_sum;
+ if (virNetDevBandwidthUpdateRate(net->def->bridge, "1:2",
+ net->def->bandwidth, new_rate) < 0)
+ VIR_WARN("Unable to update rate for 1:2 class on %s bridge",
+ net->def->bridge);
+ /* no class is associated any longer */
+ iface->class_id = 0;
+ }
+
+cleanup:
+ if (net)
+ virNetworkObjUnlock(net);
+ return ret;
+}
diff --git a/src/network/bridge_driver.h b/src/network/bridge_driver.h
index 0fae275..2f4b2e4 100644
--- a/src/network/bridge_driver.h
+++ b/src/network/bridge_driver.h
@@ -48,8 +48,12 @@ int networkGetNetworkAddress(const char *netname, char **netaddr)
int networkBuildDhcpDaemonCommandLine(virNetworkObjPtr network,
virCommandPtr *cmdout, char *pidfile,
- dnsmasqContext *dctx)
- ;
+ dnsmasqContext *dctx);
+int networkNotifyPlug(virNetworkPtr network,
+ virDomainNetDefPtr iface)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+int networkNotifyUnplug(virDomainNetDefPtr iface)
+ ATTRIBUTE_NONNULL(1);
# else
/* Define no-op replacements that don't drag in any link dependencies. */
# define networkAllocateActualDevice(iface) 0
@@ -57,6 +61,8 @@ int networkBuildDhcpDaemonCommandLine(virNetworkObjPtr network,
# define networkReleaseActualDevice(iface) 0
# define networkGetNetworkAddress(netname, netaddr) (-2)
# define networkBuildDhcpDaemonCommandLine(network, cmdout, pidfile, dctx) 0
+# define networkNotifyPlug(network, iface) 0
+# define networkNotifyUnplug(network, iface) 0
# endif
typedef char *(*networkDnsmasqLeaseFileNameFunc)(const char *netname);
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index c1f8680..efe98b3 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -207,12 +207,13 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP;
bool template_ifname = false;
int actualType = virDomainNetGetActualType(net);
+ virNetworkPtr network = NULL;
+ virErrorPtr errobj = NULL;
if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK) {
int active, fail = 0;
- virErrorPtr errobj;
- virNetworkPtr network = virNetworkLookupByName(conn,
- net->data.network.name);
+ network = virNetworkLookupByName(conn,
+ net->data.network.name);
if (!network)
return -1;
@@ -232,14 +233,14 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
fail = 1;
}
- /* Make sure any above failure is preserved */
- errobj = virSaveLastError();
- virNetworkFree(network);
- virSetError(errobj);
- virFreeError(errobj);
-
- if (fail)
+ if (fail) {
+ /* Make sure any above failure is preserved */
+ errobj = virSaveLastError();
+ virNetworkFree(network);
+ virSetError(errobj);
+ virFreeError(errobj);
return -1;
+ }
} else if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
if (!(brname = strdup(virDomainNetGetActualBridgeName(net)))) {
@@ -301,6 +302,12 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
goto cleanup;
}
+ if (tapfd >= 0 && network &&
+ networkNotifyPlug(network, net) < 0) {
+ VIR_FORCE_CLOSE(tapfd);
+ goto cleanup;
+ }
+
if (tapfd >= 0) {
if ((net->filter) && (net->ifname)) {
if (virDomainConfNWFilterInstantiate(conn, def->uuid, net) < 0)
@@ -310,7 +317,10 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
cleanup:
VIR_FREE(brname);
-
+ errobj = virSaveLastError();
+ virNetworkFree(network);
+ virSetError(errobj);
+ virFreeError(errobj);
return tapfd;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 29b7ae1..6ff065c 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -4047,6 +4047,18 @@ void qemuProcessStop(struct qemud_driver *driver,
}
}
+ for (i = 0; i < vm->def->nnets; i++) {
+ virDomainNetDefPtr net = vm->def->nets[i];
+
+ if (virDomainNetGetActualType(net) != VIR_DOMAIN_NET_TYPE_NETWORK)
+ continue;
+
+ if (networkNotifyUnplug(net) < 0) {
+ VIR_WARN("Unable to remove QoS settings for interface
'%s'",
+ net->ifname);
+ }
+ }
+
if (priv->agent) {
qemuAgentClose(priv->agent);
priv->agent = NULL;
To summarize: I think this needs to be refactored to be integrated into
the network*ActualDevice() functions so that the bookkeeping is properly
handled in all cases, including hotplug and libvirtd restart.