February 2012 - Devel - Libvirt List Archives

[libvirt] [PATCH] vmware: detect when a domain was shut down from the inside

by Jean-Baptiste Rouault

This patch adds an internal function vmwareGetVMStatus to get the real state of the domain. This function is used in various places in the driver, in particular to detect when the domain has been shut down by the user with the "halt" command. --- src/vmware/vmware_driver.c | 83 +++++++++++++++++++++++++++++++++++++------- 1 files changed, 70 insertions(+), 13 deletions(-) diff --git a/src/vmware/vmware_driver.c b/src/vmware/vmware_driver.c index 56e9d2d..6f75f86 100644 --- a/src/vmware/vmware_driver.c +++ b/src/vmware/vmware_driver.c @@ -28,6 +28,7 @@ #include "datatypes.h" #include "virfile.h" #include "memory.h" +#include "util.h" #include "uuid.h" #include "command.h" #include "vmx.h" @@ -181,6 +182,50 @@ vmwareGetVersion(virConnectPtr conn, unsigned long *version) } static int +vmwareGetVMStatus(struct vmware_driver *driver, + virDomainObjPtr vm, + int *status, + int *reason) +{ + virCommandPtr cmd; + char *outbuf; + char *vmxAbsolutePath; + int state; + int ret = -1; + + cmd = virCommandNewArgList(VMRUN, "-T", vmw_types[driver->type], + "list", NULL); + virCommandSetOutputBuffer(cmd, &outbuf); + if (virCommandRun(cmd, NULL) < 0) + goto cleanup; + + state = virDomainObjGetState(vm, reason); + + if (virFileResolveAllLinks(((vmwareDomainPtr) vm->privateData)->vmxPath, + &vmxAbsolutePath) == -1) + goto cleanup; + + if (strstr(outbuf, vmxAbsolutePath)) { + /* If the vmx path is in the output, the domain is running or + * is paused but we have no way to detect if it is paused or not. */ + if (state == VIR_DOMAIN_PAUSED) + *status = state; + else + *status = VIR_DOMAIN_RUNNING; + } else { + *status = VIR_DOMAIN_SHUTOFF; + } + + ret = 0; + +cleanup: + virCommandFree(cmd); + VIR_FREE(outbuf); + VIR_FREE(vmxAbsolutePath); + return ret; +} + +static int vmwareStopVM(struct vmware_driver *driver, virDomainObjPtr vm, virDomainShutoffReason reason) @@ -212,12 +257,6 @@ vmwareStartVM(struct vmware_driver *driver, virDomainObjPtr vm) }; const char *vmxPath = ((vmwareDomainPtr) vm->privateData)->vmxPath; - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) { - vmwareError(VIR_ERR_OPERATION_INVALID, "%s", - _("domain is not in shutoff state")); - return -1; - } - vmwareSetSentinal(cmd, vmw_types[driver->type]); vmwareSetSentinal(cmd, vmxPath); if (!((vmwareDomainPtr) vm->privateData)->gui) @@ -317,6 +356,7 @@ vmwareDomainShutdownFlags(virDomainPtr dom, { struct vmware_driver *driver = dom->conn->privateData; virDomainObjPtr vm; + int status; int ret = -1; virCheckFlags(0, -1); @@ -331,7 +371,10 @@ vmwareDomainShutdownFlags(virDomainPtr dom, goto cleanup; } - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) { + if (vmwareGetVMStatus(driver, vm, &status, NULL) == -1) + goto cleanup; + + if (status != VIR_DOMAIN_RUNNING) { vmwareError(VIR_ERR_INTERNAL_ERROR, "%s", _("domain is not in running state")); goto cleanup; @@ -467,6 +510,7 @@ vmwareDomainReboot(virDomainPtr dom, unsigned int flags) VMRUN, "-T", PROGRAM_SENTINAL, "reset", PROGRAM_SENTINAL, "soft", NULL }; + int status; int ret = -1; virCheckFlags(0, -1); @@ -485,8 +529,10 @@ vmwareDomainReboot(virDomainPtr dom, unsigned int flags) vmwareSetSentinal(cmd, vmw_types[driver->type]); vmwareSetSentinal(cmd, vmxPath); + if (vmwareGetVMStatus(driver, vm, &status, NULL) == -1) + goto cleanup; - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) { + if (status != VIR_DOMAIN_RUNNING) { vmwareError(VIR_ERR_INTERNAL_ERROR, "%s", _("domain is not in running state")); goto cleanup; @@ -582,6 +628,7 @@ vmwareDomainCreateWithFlags(virDomainPtr dom, { struct vmware_driver *driver = dom->conn->privateData; virDomainObjPtr vm; + int status; int ret = -1; virCheckFlags(0, -1); @@ -596,7 +643,10 @@ vmwareDomainCreateWithFlags(virDomainPtr dom, goto cleanup; } - if (virDomainObjIsActive(vm)) { + if (vmwareGetVMStatus(driver, vm, &status, NULL) == -1) + goto cleanup; + + if (status != VIR_DOMAIN_SHUTOFF) { vmwareError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is already running")); goto cleanup; @@ -623,6 +673,7 @@ vmwareDomainUndefineFlags(virDomainPtr dom, { struct vmware_driver *driver = dom->conn->privateData; virDomainObjPtr vm; + int status; int ret = -1; virCheckFlags(0, -1); @@ -645,7 +696,10 @@ vmwareDomainUndefineFlags(virDomainPtr dom, goto cleanup; } - if (virDomainObjIsActive(vm)) { + if (vmwareGetVMStatus(driver, vm, &status, NULL) == -1) + goto cleanup; + + if (status == VIR_DOMAIN_RUNNING) { vm->persistent = 0; } else { virDomainRemoveInactive(&driver->domains, vm); @@ -902,6 +956,7 @@ vmwareDomainGetInfo(virDomainPtr dom, virDomainInfoPtr info) { struct vmware_driver *driver = dom->conn->privateData; virDomainObjPtr vm; + int state; int ret = -1; vmwareDriverLock(driver); @@ -914,7 +969,10 @@ vmwareDomainGetInfo(virDomainPtr dom, virDomainInfoPtr info) goto cleanup; } - info->state = virDomainObjGetState(vm, NULL); + if (vmwareGetVMStatus(driver, vm, &state, NULL) == -1) + goto cleanup; + + info->state = state; info->cpuTime = 0; info->maxMem = vm->def->mem.max_balloon; info->memory = vm->def->mem.cur_balloon; @@ -949,8 +1007,7 @@ vmwareDomainGetState(virDomainPtr dom, goto cleanup; } - *state = virDomainObjGetState(vm, reason); - ret = 0; + ret = vmwareGetVMStatus(driver, vm, state, reason); cleanup: if (vm) -- 1.7.8.3

13 years, 4 months

3
3
0 / 0

[libvirt] Per-guest configurable user/group for QEMU processes

by Marcelo Cerri

Hi, I'm starting working on an improvement for libvirt to be able to support per-guest configurable user and group IDs for QEMU processes. Currently, libvirt uses a configurable pair of user and group, which is defined in qemu.conf, for all qemu processes when running in privileged mode. This topic was already commented in qemu mailing list (http://lists.nongnu.org/archive/html/qemu-devel/2011-10/msg00758.html) but, as this requires changes in libvirt API, I'd like to discuss what would be the best solution for it. A solution (as proposed in the link above) would be to extend the security driver model to allow multiple drivers. In this case, an example of the XML definition would be: ... <seclabel type='dynamic' model='selinux'> <label>system_u:system_r:svirt_t:s0:c633,c712</label> <imagelabel>system_u:object_r:svirt_image_t:s0:c633,c712</imagelabel> </seclabel> <seclabel type='dynamic' model='dac'> <label>102:102</label> <imagelabel>102:102</imagelabel> </seclabel> ... I don't know if this is a clean solution because the usual option would be to enclose the block above in a "<seclabels>" tag. But as this would break the actual API, it's not viable. Another option is to expose the stack security driver that already exists internally in libvirt (maybe extending it to support more than two security drivers): ... <seclabel type='stack'> <seclabel type='dynamic' model='selinux'> <label>system_u:system_r:svirt_t:s0:c633,c712</label> <imagelabel>system_u:object_r:svirt_image_t:s0:c633,c712</imagelabel> </seclabel> <seclabel type='dynamic' model='dac'> <label>102:102</label> <imagelabel>102:102</imagelabel> </seclabel> </seclabel> ... In that case, a nested seclabel only would be allowed when type='stack'. Independently of how multiple security drivers can be expressed in the XML, another problem would be how functions as virDomainGetSecurityLabel should behave. A third option is to just not support multiple security drivers and include a new tag for DAC: ... <seclabel type='dynamic' model='selinux'> <label>system_u:system_r:svirt_t:s0:c633,c712</label> <imagelabel>system_u:object_r:svirt_image_t:s0:c633,c712</imagelabel> </seclabel> <dac process='102:102' image='102:102'/> ... Please let me know your opinions about this topic. Regards, Marcelo

13 years, 4 months

3
6
0 / 0

[libvirt] [PATCH 0/5 0/1 0/1 V3] Add new public API virDomainGetPcpusUsage() and pcpuinfo command in virsh

by Lai Jiangshan

"virt-top -1" can call virDomainGetPcpusUsage() periodically and get the CPU activities per CPU. (See the last patch in this series). virsh is also added a pcpuinfo command which calls virDomainGetPcpusUsage(), it gets information about the physical CPUs, such as the usage of CPUs, the current attached vCPUs. # virsh pcpuinfo rhel6 CPU: 0 Curr VCPU: - Usage: 47.3 CPU: 1 Curr VCPU: 1 Usage: 46.8 CPU: 2 Curr VCPU: 0 Usage: 52.7 CPU: 3 Curr VCPU: - Usage: 44.1 Changed from V2: Simple cleanup Add python implementation of virDomainGetPcpusUsage() Acked-by: "Richard W.M. Jones" <rjones(a)redhat.com> Signed-off-by: Lai Jiangshan <laijs(a)cn.fujitsu.com> Patch for libvirt(5 patches): daemon/remote.c | 68 ++++++++++++++++++++++++++++ include/libvirt/libvirt.h.in | 5 ++ python/generator.py | 1 + python/libvirt-override-api.xml | 6 +++ python/libvirt-override.c | 33 ++++++++++++++ src/driver.h | 7 +++ src/libvirt.c | 51 +++++++++++++++++++++ src/libvirt_public.syms | 5 ++ src/qemu/qemu.conf | 5 +- src/qemu/qemu_conf.c | 3 +- src/qemu/qemu_driver.c | 74 +++++++++++++++++++++++++++++++ src/remote/remote_driver.c | 51 +++++++++++++++++++++ src/remote/remote_protocol.x | 17 +++++++- src/remote_protocol-structs | 13 +++++ src/util/cgroup.c | 7 +++ src/util/cgroup.h | 1 + tools/virsh.c | 93 +++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 5 ++ 18 files changed, 441 insertions(+), 4 deletions(-) Patch for ocaml-libvirt (1 patch): libvirt/libvirt.ml | 1 + libvirt/libvirt.mli | 4 ++++ libvirt/libvirt_c_oneoffs.c | 25 +++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 0 deletions(-) Patch for virt-top (1 patch): virt-top/virt_top.ml | 75 +++++++++++++++++-------------------------------- 1 files changed, 26 insertions(+), 49 deletions(-) -- 1.7.4.4

13 years, 4 months

7
60
0 / 0

[libvirt] [PATCH RFC]: Support numad

by Osier Yang

numad is an user-level daemon that monitors NUMA topology and processes resource consumption to facilitate good NUMA resource alignment of applications/virtual machines to improve performance and minimize cost of remote memory latencies. It provides a pre-placement advisory interface, so significant processes can be pre-bound to nodes with sufficient available resources. More details: http://fedoraproject.org/wiki/Features/numad "numad -w ncpus:memory_amount" is the advisory interface numad provides currently. This patch add the support by introducing new XML like: <numatune> <cpu required_cpus="4" required_memory="524288"/> </numatune> And the corresponding numad command line will be: numad -w 4:500 The advisory nodeset returned from numad will be used to set domain process CPU affinity then. (e.g. qemuProcessInitCpuAffinity). If the user specifies both CPU affinity policy (e.g. (<vcpu cpuset="1-10,^7,^8">4</vcpu>) and XML indicating to use numad for the advisory nodeset, the specified CPU affinity will be overridden by the nodeset returned from numad. If no XML to specify the CPU affinity policy, and XML indicating to use numad is specified, the returned nodeset will be printed in <cpu cpuset="$nodeset_from_numad"/>4</vcpu>. Only QEMU/KVM and LXC drivers support it now. --- configure.ac | 8 +++ docs/formatdomain.html.in | 18 ++++++- docs/schemas/domaincommon.rng | 12 ++++ src/conf/domain_conf.c | 125 +++++++++++++++++++++++++++++++---------- src/conf/domain_conf.h | 5 ++ src/lxc/lxc_controller.c | 98 ++++++++++++++++++++++++++++---- src/qemu/qemu_process.c | 99 +++++++++++++++++++++++++++++---- 7 files changed, 311 insertions(+), 54 deletions(-) diff --git a/configure.ac b/configure.ac index c9cdd7b..31f0835 100644 --- a/configure.ac +++ b/configure.ac @@ -1445,6 +1445,14 @@ AM_CONDITIONAL([HAVE_NUMACTL], [test "$with_numactl" != "no"]) AC_SUBST([NUMACTL_CFLAGS]) AC_SUBST([NUMACTL_LIBS]) +dnl Do we have numad? +if test "$with_qemu" = "yes"; then + AC_PATH_PROG([NUMAD], [numad], [], [/bin:/usr/bin:/usr/local/bin:$PATH]) + + if test -n "$NUMAD"; then + AC_DEFINE_UNQUOTED([NUMAD],["$NUMAD"], [Location or name of the numad program]) + fi +fi dnl pcap lib LIBPCAP_CONFIG="pcap-config" diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 6fcca94..d8e70a6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -505,6 +505,7 @@ ... <numatune> <memory mode="strict" nodeset="1-4,^3"/> + <cpu required_cpus="3" required_memory="524288"/> </numatune> ... </domain> @@ -519,7 +520,7 @@ Since 0.9.3 <dt><code>memory</code></dt> <dd> - The optional <code>memory</code> element specify how to allocate memory + The optional <code>memory</code> element specifies how to allocate memory for the domain process on a NUMA host. It contains two attributes, attribute <code>mode</code> is either 'interleave', 'strict', or 'preferred', @@ -527,6 +528,21 @@ syntax with attribute <code>cpuset</code> of element <code>vcpu</code>. Since 0.9.3 </dd> + <dd> + The optional <code>cpu</code> element indicates pinning the virtual CPUs + to the nodeset returned by querying "numad" (a system daemon that monitors + NUMA topology and usage). It has two attributes, attribute + <code>required_cpus</code> specifies the number of physical CPUs the guest + process want to use. And the optional attribute <code>required_memory</code> + specifies the amount of free memory the guest process want to see on a node, + "numad" will pick the physical CPUs on the node which has enough free + memory of amount specified by <code>required_memory</code>. + + NB, with using this element, the physical CPUs specified by attribute + <code>cpuset</code> (of element <code>vcpu</code>) will be overridden by the + nodeset returned from "numad". + Since 0.9.11 (QEMU/KVM and LXC only) + </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 3908733..d0f443d 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -549,6 +549,18 @@ </attribute> </element> </optional> + <optional> + <element name="cpu"> + <attribute name="required_cpu"> + <ref name="countCPU"/> + </attribute> + <optional> + <attribute name="required_memory"> + <ref name="memoryKB"/> + </attribute> + </optional> + </element> + </optional> </element> </optional> </interleave> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f9654f1..aa03c05 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7125,7 +7125,6 @@ error: goto cleanup; } - static int virDomainDefMaybeAddController(virDomainDefPtr def, int type, int idx) @@ -7185,6 +7184,7 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, bool uuid_generated = false; virBitmapPtr bootMap = NULL; unsigned long bootMapSize = 0; + xmlNodePtr cur; if (VIR_ALLOC(def) < 0) { virReportOOMError(); @@ -7454,47 +7454,100 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, VIR_FREE(nodes); /* Extract numatune if exists. */ - if ((n = virXPathNodeSet("./numatune", ctxt, NULL)) < 0) { + if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot extract numatune nodes")); goto error; } + if (n > 1) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("only one numatune is supported")); + VIR_FREE(nodes); + goto error; + } + if (n) { - tmp = virXPathString("string(./numatune/memory/@nodeset)", ctxt); - if (tmp) { - char *set = tmp; - int nodemasklen = VIR_DOMAIN_CPUMASK_LEN; + cur = nodes[0]->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((xmlStrEqual(cur->name, BAD_CAST "memory"))) { + tmp = virXMLPropString(cur, "nodeset"); - if (VIR_ALLOC_N(def->numatune.memory.nodemask, nodemasklen) < 0) { - goto no_memory; - } + if (tmp) { + char *set = tmp; + int nodemasklen = VIR_DOMAIN_CPUMASK_LEN; - /* "nodeset" leads same syntax with "cpuset". */ - if (virDomainCpuSetParse(set, 0, def->numatune.memory.nodemask, - nodemasklen) < 0) - goto error; - VIR_FREE(tmp); - } else { - virDomainReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("nodeset for NUMA memory tuning must be set")); - goto error; - } + if (VIR_ALLOC_N(def->numatune.memory.nodemask, + nodemasklen) < 0) { + virReportOOMError(); + goto error; + } - tmp = virXPathString("string(./numatune/memory/@mode)", ctxt); - if (tmp) { - if ((def->numatune.memory.mode = - virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { - virDomainReportError(VIR_ERR_INTERNAL_ERROR, - _("Unsupported NUMA memory tuning mode '%s'"), - tmp); - goto error; + /* "nodeset" leads same syntax with "cpuset". */ + if (virDomainCpuSetParse(set, 0, + def->numatune.memory.nodemask, + nodemasklen) < 0) + goto error; + VIR_FREE(tmp); + } else { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("nodeset for NUMA memory " + "tuning must be set")); + goto error; + } + + tmp = virXMLPropString(cur, "mode"); + if (tmp) { + if ((def->numatune.memory.mode = + virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { + virDomainReportError(VIR_ERR_XML_ERROR, + _("Unsupported NUMA memory " + "tuning mode '%s'"), + tmp); + goto error; + } + VIR_FREE(tmp); + } else { + def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + } + } else if (xmlStrEqual(cur->name, BAD_CAST "cpu")) { + char *req_cpus = NULL; + char *req_memory = NULL; + req_cpus = virXMLPropString(cur, "required_cpus"); + req_memory = virXMLPropString(cur, "required_memory"); + + if (req_cpus && + virStrToLong_ui(req_cpus, NULL, 10, + &def->numatune.cpu.required_cpus) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot parse <cpu> 'required_cpus'" + " attribute")); + goto error; + } + + if (req_memory && + virStrToLong_ul(req_memory, NULL, 10, + &def->numatune.cpu.required_memory) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot parse <cpu> 'required_memory'" + " attribute")); + goto error; + } + + VIR_FREE(req_cpus); + VIR_FREE(req_memory); + } else { + virDomainReportError(VIR_ERR_XML_ERROR, + _("unsupported XML element %s"), + (const char *)cur->name); + goto error; + } } - VIR_FREE(tmp); - } else { - def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + cur = cur->next; } } + VIR_FREE(nodes); n = virXPathNodeSet("./features/*", ctxt, &nodes); if (n < 0) @@ -11761,7 +11814,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota) virBufferAddLit(buf, " </cputune>\n"); - if (def->numatune.memory.nodemask) { + if (def->numatune.memory.nodemask || + def->numatune.cpu.required_cpus) { const char *mode; char *nodemask = NULL; @@ -11778,6 +11832,15 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, " <memory mode='%s' nodeset='%s'/>\n", mode, nodemask); VIR_FREE(nodemask); + + if (def->numatune.cpu.required_cpus) + virBufferAsprintf(buf, " <cpu required_cpus='%d' ", + def->numatune.cpu.required_cpus); + + if (def->numatune.cpu.required_memory) + virBufferAsprintf(buf, "required_memory='%lu'/>\n", + def->numatune.cpu.required_memory); + virBufferAddLit(buf, " </numatune>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 596be4d..1284599 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1416,6 +1416,11 @@ struct _virDomainNumatuneDef { int mode; } memory; + struct { + unsigned int required_cpus; + unsigned long required_memory; + } cpu; + /* Future NUMA tuning related stuff should go here. */ }; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 8f336f5..ec6434d 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -327,6 +327,47 @@ static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) } #endif +#if defined(NUMAD) +static char * +lxcGetNumadAdvice(unsigned int req_cpus, + unsigned long req_memory) { + virCommandPtr cmd = NULL; + char *reqs = NULL; + char *ret = NULL; + + /* numad uses "MB" for memory. */ + if (req_memory) { + req_memory = req_memory / 1024; + if (virAsprintf(&reqs, "%d:%lu", req_cpus, req_memory) < 0) { + virReportOOMError(); + goto out; + } + cmd = virCommandNewArgList(NUMAD, "-w", reqs, NULL); + } else { + cmd = virCommandNewArgList(NUMAD, "-w", "%d", req_cpus, NULL); + } + + virCommandSetOutputBuffer(cmd, &ret); + + if (virCommandRun(cmd, NULL) < 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to query numad for the advisory nodeset")); + } + +out: + VIR_FREE(reqs); + virCommandFree(cmd); + return ret; +} +#else +static char * +lxcGetNumadAdvice(unsigned int req_cpus ATTRIBUTE_UNUSED, + unsigned long req_memory ATTRIBUTE_UNUSED) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("numad is not available on this host")); + return NULL; +} +#endif /* * To be run while still single threaded @@ -355,19 +396,54 @@ static int lxcSetContainerCpuAffinity(virDomainDefPtr def) return -1; } - if (def->cpumask) { - /* XXX why don't we keep 'cpumask' in the libvirt cpumap - * format to start with ?!?! */ - for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) - if (def->cpumask[i]) + /* def->cpumask will be overridden by the nodeset + * suggested by numad if it's specified. + */ + if (def->numatune.cpu.required_cpus) { + char *tmp_cpumask = NULL; + char *nodeset = NULL; + + nodeset = lxcGetNumadAdvice(def->numatune.cpu.required_cpus, + def->numatune.cpu.required_memory); + if (!nodeset) + return -1; + + if (VIR_ALLOC_N(tmp_cpumask, VIR_DOMAIN_CPUMASK_LEN) < 0) { + virReportOOMError(); + return -1; + } + + if (virDomainCpuSetParse(nodeset, 0, tmp_cpumask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(tmp_cpumask); + VIR_FREE(nodeset); + return -1; + } + + for (i = 0; i < maxcpu && i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (tmp_cpumask[i]) VIR_USE_CPU(cpumap, i); + } + + /* Update def->cpumask */ + VIR_FREE(def->cpumask); + def->cpumask = tmp_cpumask; + VIR_FREE(nodeset); } else { - /* You may think this is redundant, but we can't assume libvirtd - * itself is running on all pCPUs, so we need to explicitly set - * the spawned LXC instance to all pCPUs if no map is given in - * its config file */ - for (i = 0 ; i < maxcpu ; i++) - VIR_USE_CPU(cpumap, i); + if (def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) + if (def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned LXC instance to all pCPUs if no map is given in + * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } } /* We are pressuming we are running between fork/exec of LXC diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 41218de..eb9f8f1 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1633,6 +1633,48 @@ qemuProcessInitNumaMemoryPolicy(virDomainObjPtr vm) } #endif +#if defined(NUMAD) +static char * +qemuGetNumadAdvice(unsigned int req_cpus, + unsigned long req_memory) { + virCommandPtr cmd = NULL; + char *reqs = NULL; + char *output = NULL; + + /* numad uses "MB" for memory. */ + if (req_memory) { + req_memory = req_memory / 1024; + if (virAsprintf(&reqs, "%d:%lu", req_cpus, req_memory) < 0) { + virReportOOMError(); + goto out; + } + + cmd = virCommandNewArgList(NUMAD, "-w", reqs, NULL); + } else { + cmd = virCommandNewArgList(NUMAD, "-w", "%u", req_cpus, NULL); + } + + virCommandSetOutputBuffer(cmd, &output); + + if (virCommandRun(cmd, NULL) < 0) + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to query numad for the advisory nodeset")); + +out: + VIR_FREE(reqs); + virCommandFree(cmd); + return output; +} +#else +static char * +qemuGetNumadAdvice(unsigned int req_cpus ATTRIBUTE_UNUSED, + unsigned long req_memory ATTRIBUTE_UNUSED) { + qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("numad is not available on this host")); + return NULL; +} +#endif + /* * To be run between fork/exec of QEMU only */ @@ -1661,19 +1703,54 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm) return -1; } - if (vm->def->cpumask) { - /* XXX why don't we keep 'cpumask' in the libvirt cpumap - * format to start with ?!?! */ - for (i = 0 ; i < maxcpu && i < vm->def->cpumasklen ; i++) - if (vm->def->cpumask[i]) + /* vm->def->cpumask will be overridden by the nodeset + * suggested by numad if it's specified. + */ + if (vm->def->numatune.cpu.required_cpus) { + char *tmp_cpumask = NULL; + char *nodeset = NULL; + + nodeset = qemuGetNumadAdvice(vm->def->numatune.cpu.required_cpus, + vm->def->numatune.cpu.required_memory); + if (!nodeset) + return -1; + + if (VIR_ALLOC_N(tmp_cpumask, VIR_DOMAIN_CPUMASK_LEN) < 0) { + virReportOOMError(); + return -1; + } + + if (virDomainCpuSetParse(nodeset, 0, tmp_cpumask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_FREE(tmp_cpumask); + VIR_FREE(nodeset); + return -1; + } + + for (i = 0; i < maxcpu && i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (tmp_cpumask[i]) VIR_USE_CPU(cpumap, i); + } + + /* Update vm->def->cpumask */ + VIR_FREE(vm->def->cpumask); + vm->def->cpumask = tmp_cpumask; + VIR_FREE(nodeset); } else { - /* You may think this is redundant, but we can't assume libvirtd - * itself is running on all pCPUs, so we need to explicitly set - * the spawned QEMU instance to all pCPUs if no map is given in - * its config file */ - for (i = 0 ; i < maxcpu ; i++) - VIR_USE_CPU(cpumap, i); + if (vm->def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < vm->def->cpumasklen ; i++) + if (vm->def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in + * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } } /* We are pressuming we are running between fork/exec of QEMU -- 1.7.7.3

13 years, 4 months

5
8
0 / 0

[libvirt] [PATCHv2 00/17] Support for <interface type='hostdev'>

by Laine Stump

This series of patches enhances the <interface device to support a sort of "intelligent hostdev", i.e. PCI passthrough where device-type specific initialization is done prior to assigning the device to the guest, in particular to allow setting the MAC address and do 802.1QbX setup for network devices. The first posting of this patch only supported parsing and formatting of these devices. This version also supports them in persistent config, as well as hotplug (both persistent and live-only). The only piece that isn't in this patchset (because it is coming from another author) is the code that actually Rather than adding all of the device-type specific config to <hostdev>, this is accomplished through adding a new type of <interface> element, type='hostdev'. When an interface is type='hostdev' the following is changed: * in the toplevel device, the managed attribute can be specified (with identical results as when it's specified in a <hostdev> * The <source> element can specify a pci address or usb address, just as can be done in <hostdev>. One notable difference is that the type of the address is specified directly in the source <address> element, rather than as an attribute of the toplevel device (that's how it's done for <hostdev>, but for <interface>, the toplevel element's type attribute is already used). NB: a type=hostdev interface will reside in both the interface list (for configuration and memory management) and hostdev list (for PCI attach/detach, and tracking of which devices are assigned)). This entire series is available on gitorious: git://gitorious.org/~laine/libvirt/laine-staging.git in the "passthrough8" branch. Patches 1-7, 9-12, and 15-16 are just setup for the new functionality - they reorder and refactor existing code to allow greater re-use of existing code and easier plugin of the new code. Those marked with "X" are unchanged from V1 (as far as my git logs tell me). Those marked "+" are new patches that weren't in V1. + [PATCH 01/17] conf: add missing device types to [PATCH 02/17] conf: relocate virDomainDeviceDef and X [PATCH 03/17] conf: reorder static functions in domain_conf.c + [PATCH 04/17] qemu: rename virDomainDeviceInfoPtr variables to avoid + [PATCH 05/17] conf: add device pointer to args of [PATCH 06/17] conf: make hostdev info a separate object X [PATCH 07/17] conf: HostdevDef parse/format helper functions + [PATCH 09/17] conf: put subsys part of virDomainHostdevDef into its + [PATCH 10/17] conf: hostdev utility functions + [PATCH 11/17] qemu: re-order functions in qemu_hotplug.c + [PATCH 12/17] qemu: refactor hotplug detach of hostdevs + [PATCH 15/17] conf: change virDomainNetRemove from static to global + [PATCH 16/17] qemu: use virDomainNetRemove instead of inline code Patch 8 is just a couple lines: [PATCH 08/17] conf: give each hostdevdef a parent pointer [PATCH 13/17] conf: parse/format type='hostdev' network interfaces + [PATCH 14/17] qemu: support type='hostdev' network devices at domain start + [PATCH 17/17] qemu: support type=hostdev network device live hotplug

13 years, 4 months

3
39
0 / 0

[libvirt] [PATCH] cpu: Add new flag supported by qemu to the cpu definition

by Peter Krempa

Some new cpu features were added to qemu. This patch adds some of them to our CPU map. --- to ease review, here's an excerpt from qemu.git/target-i386/cpuid.c to ease the review: static const char *ext3_feature_name[] = { "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */, "cr8legacy" /* AMD AltMovCr8 */, "abm", "sse4a", "misalignsse", "3dnowprefetch", "osvw", "ibs", "xop", "skinit", "wdt", NULL, NULL, "fma4", NULL, "cvt16", "nodeid_msr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; (each line corresponds to one order of magnitude in the hex representation) src/cpu/cpu_map.xml | 15 +++++++++++++++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/src/cpu/cpu_map.xml b/src/cpu/cpu_map.xml index 3e6810f..4ce3131 100644 --- a/src/cpu/cpu_map.xml +++ b/src/cpu/cpu_map.xml @@ -236,12 +236,27 @@ <feature name='osvw'>  <cpuid function='0x80000001' ecx='0x00000200'/> </feature> + <feature name='ibs'> + <cpuid function='0x80000001' ecx='0x00000400'/> + </feature> + <feature name='xop'> + <cpuid function='0x80000001' ecx='0x00000800'/> + </feature> <feature name='skinit'>  <cpuid function='0x80000001' ecx='0x00001000'/> </feature> <feature name='wdt'> <cpuid function='0x80000001' ecx='0x00002000'/> </feature> + <feature name='fma4'> + <cpuid function='0x80000001' ecx='0x00010000'/> + </feature> + <feature name='cvt16'> + <cpuid function='0x80000001' ecx='0x00040000'/> + </feature> + <feature name='nodeid_msr'> + <cpuid function='0x80000001' ecx='0x00080000'/> + </feature>  <model name='486'> -- 1.7.3.4

13 years, 4 months

2
2
0 / 0

[libvirt] [PATCH 0/2] mingw64 build fixes

by Eric Blake

I still don't have my mingw64 cross-compile working nicely, so although I'd like to push this under the build-breaker rule, I would feel safer waiting for Marc-André's test results. Eric Blake (2): build: use correct type for pid and similar types build: fix output of pid values cfg.mk | 6 ++++++ daemon/libvirtd.c | 4 ++-- include/libvirt/libvirt-qemu.h | 4 ++-- src/conf/domain_conf.h | 2 +- src/conf/storage_conf.c | 4 ++-- src/conf/storage_conf.h | 8 ++++---- src/driver.h | 3 ++- src/libvirt-qemu.c | 17 +++++++++++------ src/probes.d | 2 +- src/qemu/qemu_command.c | 13 +++++++------ src/qemu/qemu_command.h | 4 ++-- src/qemu/qemu_driver.c | 24 ++++++++++++++---------- src/qemu/qemu_process.c | 2 +- src/qemu/qemu_process.h | 2 +- src/qemu_protocol-structs | 2 +- src/remote/qemu_protocol.x | 4 ++-- src/rpc/virnetsocket.c | 12 ++++++------ src/security/security_dac.c | 27 ++++++++++++++++----------- src/uml/uml_driver.c | 6 +++--- src/util/cgroup.c | 17 +++++++++-------- src/util/command.c | 20 +++++++++----------- src/util/virnetdev.c | 4 ++-- src/util/virnetdev.h | 2 +- src/vmware/vmware_conf.c | 10 ++++++---- tests/testutils.c | 2 +- tools/virsh.c | 10 +++++----- 26 files changed, 117 insertions(+), 94 deletions(-) -- 1.7.7.6

13 years, 4 months

2
6
0 / 0

[libvirt] [PATCH] Use the same MAC address that is defined in domain XML for attached-mac field.

by Ansis Atteka

Currently libvirt sets the attached-mac to altered MAC address that has first byte set to FE. This patch will change that behavior by using the original (unaltered) MAC address from the domain XML configuration file. --- src/network/bridge_driver.c | 2 +- src/qemu/qemu_command.c | 5 +---- src/uml/uml_conf.c | 5 +---- src/util/virnetdevtap.c | 11 ++++++++++- src/util/virnetdevtap.h | 1 + 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index 8575d3e..3e1e031 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -1766,7 +1766,7 @@ networkStartNetworkVirtual(struct network_driver *driver, } if (virNetDevTapCreateInBridgePort(network->def->bridge, &macTapIfName, network->def->mac, - 0, false, NULL, NULL) < 0) { + false, 0, false, NULL, NULL) < 0) { VIR_FREE(macTapIfName); goto err0; } diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 5a34504..671054c 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -180,7 +180,6 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, int tapfd = -1; int vnet_hdr = 0; bool template_ifname = false; - unsigned char tapmac[VIR_MAC_BUFLEN]; int actualType = virDomainNetGetActualType(net); if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK) { @@ -244,9 +243,7 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1; - memcpy(tapmac, net->mac, VIR_MAC_BUFLEN); - tapmac[0] = 0xFE; /* Discourage bridge from using TAP dev MAC */ - err = virNetDevTapCreateInBridgePort(brname, &net->ifname, tapmac, + err = virNetDevTapCreateInBridgePort(brname, &net->ifname, net->mac, true, vnet_hdr, true, &tapfd, virDomainNetGetActualVirtPortProfile(net)); virDomainAuditNetDevice(def, net, "/dev/net/tun", tapfd >= 0); diff --git a/src/uml/uml_conf.c b/src/uml/uml_conf.c index dbbbfda..c7b29a0 100644 --- a/src/uml/uml_conf.c +++ b/src/uml/uml_conf.c @@ -127,7 +127,6 @@ umlConnectTapDevice(virConnectPtr conn, const char *bridge) { bool template_ifname = false; - unsigned char tapmac[VIR_MAC_BUFLEN]; if (!net->ifname || STRPREFIX(net->ifname, VIR_NET_GENERATED_PREFIX) || @@ -139,9 +138,7 @@ umlConnectTapDevice(virConnectPtr conn, template_ifname = true; } - memcpy(tapmac, net->mac, VIR_MAC_BUFLEN); - tapmac[0] = 0xFE; /* Discourage bridge from using TAP dev MAC */ - if (virNetDevTapCreateInBridgePort(bridge, &net->ifname, tapmac, + if (virNetDevTapCreateInBridgePort(bridge, &net->ifname, net->mac, true, 0, true, NULL, virDomainNetGetActualVirtPortProfile(net)) < 0) { if (template_ifname) diff --git a/src/util/virnetdevtap.c b/src/util/virnetdevtap.c index 0fce08d..868ba57 100644 --- a/src/util/virnetdevtap.c +++ b/src/util/virnetdevtap.c @@ -22,6 +22,7 @@ #include <config.h> +#include "virmacaddr.h" #include "virnetdevtap.h" #include "virnetdev.h" #include "virnetdevbridge.h" @@ -248,6 +249,7 @@ int virNetDevTapDelete(const char *ifname ATTRIBUTE_UNUSED) * @brname: the bridge name * @ifname: the interface name (or name template) * @macaddr: desired MAC address (VIR_MAC_BUFLEN long) + * @discourage: whether bridge should be discouraged from using macaddr * @vnet_hdr: whether to try enabling IFF_VNET_HDR * @tapfd: file descriptor return value for the new tap device * @virtPortProfile: bridge/port specific configuration @@ -265,11 +267,14 @@ int virNetDevTapDelete(const char *ifname ATTRIBUTE_UNUSED) int virNetDevTapCreateInBridgePort(const char *brname, char **ifname, const unsigned char *macaddr, + bool discourage, int vnet_hdr, bool up, int *tapfd, virNetDevVPortProfilePtr virtPortProfile) { + unsigned char tapmac[VIR_MAC_BUFLEN]; + if (virNetDevTapCreate(ifname, vnet_hdr, tapfd) < 0) return -1; @@ -279,7 +284,11 @@ int virNetDevTapCreateInBridgePort(const char *brname, * seeing the kernel allocate random MAC for the TAP * device before we set our static MAC. */ - if (virNetDevSetMAC(*ifname, macaddr) < 0) + memcpy(tapmac, macaddr, VIR_MAC_BUFLEN); + if (discourage) + tapmac[0] = 0xFE; /* Discourage bridge from using TAP dev MAC */ + + if (virNetDevSetMAC(*ifname, tapmac) < 0) goto error; /* We need to set the interface MTU before adding it diff --git a/src/util/virnetdevtap.h b/src/util/virnetdevtap.h index 918f3dc..fc50e22 100644 --- a/src/util/virnetdevtap.h +++ b/src/util/virnetdevtap.h @@ -37,6 +37,7 @@ int virNetDevTapDelete(const char *ifname) int virNetDevTapCreateInBridgePort(const char *brname, char **ifname, const unsigned char *macaddr, + bool discourage, int vnet_hdr, bool up, int *tapfd, -- 1.7.5.4

13 years, 4 months

3
12
0 / 0

[libvirt] Downloading and wiping assumes volume is a device or file

by Wido den Hollander

Hi, I'm still working on the RBD (RADOS / Ceph) storage driver for libvirt and I noticed the virStorageVolDownload and virStorageVolWipe methods. I assumed those would be passed on to the storage backend, but it doesn't. In the storageDriver the method storageVolumeDownload simply opens a file descriptor and reads the device. Until now libvirt only had support for storage drivers who presented regular files or block devices, but RBD doesn't. (Well, RBD could, but I'm currently going for Qemu-RBD). In the future we might see more storage drivers in libvirt for a project like Sheepdog as well. Sheepdog and RBD both have drivers in Qemu. What would the way be to approach this? Should the download, upload and wipe methods be moved to the storage backends? There could also be an exception? If virStoragePoolType matches VIR_STORAGE_POOL_RBD or VIR_STORAGE_POOL_SHEEPDOG the storage backend could be invoked instead of opening the file descriptor? Any thoughts on this? Thanks, Wido

13 years, 4 months

2
3
0 / 0

[libvirt] [PATCH 1/1] lxc: handle shutdown (and detect, but mis-handle reboot)

by Serge Hallyn

The -mm tree has Daniel Lezcano's patch changing the handling of sys_reboot in a non-init pidns. That means that, with that support, (a) it is safe to grant CAP_SYS_BOOT to a container, and (b) it's possible to distinguish between reboot and shutdown. I've implemented partial support of this for libvirt in the patch below. If Daniel's patch is not in the running kernel, then CAP_SYS_BOOT will be dropped for the container. Otherwise, it will be kept in. When the container exits, if it was determined to be a shutdown, the container will terminate. However, I didn't know how to properly do the reboot part. The patch below shows how to detect it (and sets the static bool wantreboot to true in that case), but I didn't know quite what to do with that. It looks like the code flow between lxcControllerRun and lxcControllerMain would need to be changed a bit so that we could re-run the lxcContainerStart() without causing the monitor.serverFD (or whichever pipe sends monitor events to lxc_driver.c to trigger autodestroy) to be closed. So for now I'm sending this patch, and hoping the sorcerers on this list can hook reboot up as well, or show the best way how. thanks, -serge Subject: [PATCH 1/1] lxc: handle shutdown (and detect, but mis-handle reboot) If Daniel Lezcano's pidns reboot patch is in the kernel, then don't drop CAP_SYS_BOOT. When container calls shutdown, terminate the container. This patch detects when the container wanted to reboot, but goes ahead and terminates the container because I don't know how to best structure the code to support restarting a container that wanted to reboot. Signed-off-by: Serge Hallyn <serge.hallyn(a)canonical.com> --- src/lxc/lxc_container.c | 13 ++++-- src/lxc/lxc_container.h | 3 +- src/lxc/lxc_controller.c | 97 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 103 insertions(+), 10 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index e93fda5..793cb19 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -102,6 +102,7 @@ struct __lxc_child_argv { char **ttyPaths; size_t nttyPaths; int handshakefd; + bool dropreboot; }; @@ -1216,7 +1217,7 @@ static int lxcContainerSetupMounts(virDomainDefPtr vmDef, * It removes some capabilities that could be dangerous to * host system, since they are not currently "containerized" */ -static int lxcContainerDropCapabilities(void) +static int lxcContainerDropCapabilities(bool dropreboot) { #if HAVE_CAPNG int ret; @@ -1226,11 +1227,11 @@ static int lxcContainerDropCapabilities(void) if ((ret = capng_updatev(CAPNG_DROP, CAPNG_EFFECTIVE | CAPNG_PERMITTED | CAPNG_INHERITABLE | CAPNG_BOUNDING_SET, - CAP_SYS_BOOT, /* No use of reboot */ CAP_SYS_MODULE, /* No kernel module loading */ CAP_SYS_TIME, /* No changing the clock */ CAP_AUDIT_CONTROL, /* No messing with auditing status */ CAP_MAC_ADMIN, /* No messing with LSM config */ + dropreboot ? CAP_SYS_BOOT : -1, /* No use of reboot? */ -1 /* sentinal */)) < 0) { lxcError(VIR_ERR_INTERNAL_ERROR, _("Failed to remove capabilities: %d"), ret); @@ -1343,7 +1344,7 @@ static int lxcContainerChild( void *data ) } /* drop a set of root capabilities */ - if (lxcContainerDropCapabilities() < 0) + if (lxcContainerDropCapabilities(argv->dropreboot) < 0) goto cleanup; if (lxcContainerSendContinue(argv->handshakefd) < 0) { @@ -1416,6 +1417,7 @@ const char *lxcContainerGetAlt32bitArch(const char *arch) * @veths: interface names * @control: control FD to the container * @ttyPath: path of tty to set as the container console + * @dropreboot: do we need to drop CAP_SYS_BOOT * * Starts a container process by calling clone() with the namespace flags * @@ -1428,7 +1430,8 @@ int lxcContainerStart(virDomainDefPtr def, int control, int handshakefd, char **ttyPaths, - size_t nttyPaths) + size_t nttyPaths, + bool dropreboot) { pid_t pid; int cflags; @@ -1436,7 +1439,7 @@ int lxcContainerStart(virDomainDefPtr def, char *stack, *stacktop; lxc_child_argv_t args = { def, securityDriver, nveths, veths, control, - ttyPaths, nttyPaths, handshakefd}; + ttyPaths, nttyPaths, handshakefd, dropreboot}; /* allocate a stack for the container */ if (VIR_ALLOC_N(stack, stacksize) < 0) { diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 77fb9b2..15738c8 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -56,7 +56,8 @@ int lxcContainerStart(virDomainDefPtr def, int control, int handshakefd, char **ttyPaths, - size_t nttyPaths); + size_t nttyPaths, + bool dropreboot); int lxcContainerAvailable(int features); diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 8f336f5..e9aa904 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -708,6 +708,7 @@ ignorable_accept_errno(int errnum) } static bool quit = false; +static bool wantreboot = false; static virMutex lock; static int sigpipe[2]; @@ -721,12 +722,33 @@ static void lxcSignalChildIO(int watch ATTRIBUTE_UNUSED, int events ATTRIBUTE_UNUSED, void *opaque) { char buf[1]; - int ret; + int ret, status; int *container = opaque; ignore_value(read(sigpipe[0], buf, 1)); - ret = waitpid(-1, NULL, WNOHANG); + ret = waitpid(-1, &status, WNOHANG); if (ret == *container) { + if (WIFSIGNALED(status)) { + switch(WTERMSIG(status)) { + case SIGINT: /* halt */ + VIR_DEBUG("XXX Container halting"); + virMutexLock(&lock); + quit = true; + virMutexUnlock(&lock); + VIR_DEBUG("XXX set quit to true"); + return; + case SIGHUP: /* reboot */ + VIR_DEBUG("XXX Container rebooting"); + virMutexLock(&lock); + wantreboot = true; + virMutexUnlock(&lock); + VIR_DEBUG("XXX set wantreboot true (i'm pid %d)", getpid()); + return; + default: + VIR_DEBUG("XXX unknown exit status for init: %d\n", WTERMSIG(status)); + break; + } + } virMutexLock(&lock); quit = true; virMutexUnlock(&lock); @@ -1082,6 +1104,62 @@ error: virMutexUnlock(&lock); } +#include <sys/reboot.h> +#include <linux/reboot.h> + +/* + * reboot(LINUX_REBOOT_CMD_CAD_ON) will return -EINVAL + * in a child pid namespace if container reboot support exists. + * Otherwise, it will either succeed or return -EPERM. + */ +static int container_reboot_supported(void *arg) +{ + int *cmd = arg; + int ret; + + ret = reboot(*cmd); + if (ret == -1 && errno == EINVAL) + return 1; + return 0; +} + +static int container_reboot_is_supported(void) +{ + FILE *f = fopen("/proc/sys/kernel/ctrl-alt-del", "r"); + int ret, cmd, v; + long stack_size = 4096; + void *stack = alloca(stack_size) + stack_size; + int status; + pid_t pid; + + if (!f) { + VIR_DEBUG("failed to open /proc/sys/kernel/ctrl-alt-del"); + return 0; + } + + ret = fscanf(f, "%d", &v); + fclose(f); + if (ret != 1) { + VIR_DEBUG("Failed to read /proc/sys/kernel/ctrl-alt-del"); + return 0; + } + cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF; + + pid = clone(container_reboot_supported, stack, CLONE_NEWPID | SIGCHLD, &cmd); + if (pid < 0) { + VIR_DEBUG("failed to clone\n"); + return 0; + } + if (wait(&status) < 0) { + VIR_DEBUG("unexpected wait error: %m\n"); + return 0; + } + + if (WEXITSTATUS(status) != 1) + return 0; + + return 1; +} /** * lxcControllerMain @@ -1214,13 +1292,19 @@ static int lxcControllerMain(int serverFd, } virMutexLock(&lock); - while (!quit) { + while (!quit && !wantreboot) { virMutexUnlock(&lock); if (virEventRunDefaultImpl() < 0) goto cleanup; virMutexLock(&lock); } virMutexUnlock(&lock); + VIR_DEBUG("XXX (pid %d) container is done", getpid()); + if (wantreboot) { + rc = 0; + VIR_DEBUG("XXX (pid %d) wantreboot is true", getpid()); + //goto cleanup2; + } err = virGetLastError(); if (!err || err->code == VIR_ERR_OK) @@ -1385,6 +1469,7 @@ lxcControllerRun(virDomainDefPtr def, size_t nloopDevs = 0; int *loopDevs = NULL; size_t i; + bool dropreboot = true; if (VIR_ALLOC_N(containerTtyFDs, nttyFDs) < 0) { virReportOOMError(); @@ -1542,6 +1627,7 @@ lxcControllerRun(virDomainDefPtr def, if (lxcSetPersonality(def) < 0) goto cleanup; + dropreboot = !container_reboot_is_supported(); if ((container = lxcContainerStart(def, securityDriver, @@ -1550,7 +1636,8 @@ lxcControllerRun(virDomainDefPtr def, control[1], containerhandshake[1], containerTtyPaths, - nttyFDs)) < 0) + nttyFDs, + dropreboot)) < 0) goto cleanup; VIR_FORCE_CLOSE(control[1]); VIR_FORCE_CLOSE(containerhandshake[1]); @@ -1603,7 +1690,9 @@ lxcControllerRun(virDomainDefPtr def, } } + VIR_DEBUG("XXX starting lxcControllerMain (i'm pid %d)\n", getpid()); rc = lxcControllerMain(monitor, client, ttyFDs, containerTtyFDs, nttyFDs, container); + VIR_DEBUG("XXX lxcControllerMain returned %d (i'm pid %d)\n", rc, getpid()); monitor = client = -1; cleanup: -- 1.7.9

13 years, 4 months

2
2
0 / 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

Devel February 2012