On Fri, Jun 28, 2024 at 09:59:29AM -0500, Praveen K Paladugu wrote:
On 6/28/2024 2:26 AM, Purna Pavan Chandra wrote:
> Cloud-hypervisor now supports restoring with new net fds.
> Ref:
https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402
> So, pass new tap fds via SCM_RIGHTS to CH's restore api.
>
> Signed-off-by: Purna Pavan Chandra <paekkaladevi(a)linux.microsoft.com>
> ---
> src/ch/ch_driver.c | 9 +---
> src/ch/ch_monitor.c | 21 ++++++++-
> src/ch/ch_monitor.h | 2 +-
> src/ch/ch_process.c | 104 ++++++++++++++++++++++++++++++++++++--------
> 4 files changed, 107 insertions(+), 29 deletions(-)
>
> diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
> index fbeac60825..7857515766 100644
> --- a/src/ch/ch_driver.c
> +++ b/src/ch/ch_driver.c
> @@ -683,14 +683,9 @@ static int
> chDomainSaveAdditionalValidation(virDomainDef *vmdef)
> {
> /*
> - SAVE and RESTORE are functional only without any networking and
> - device passthrough configuration
> + SAVE and RESTORE are functional only without any host device
> + passthrough configuration
> */
Add a version based check as well here. We should check cloud-hypervisor
version >= 40 here.
For reference:
https://github.com/libvirt/libvirt/commit/b2e43609fd68b84f626d63e836acd87...
Sure, I'll add this change.
Similar to chDomainSaveAdditionalValidation, consider adding a validation
step during Restore as well. This is required, if a user tries to save the
VM on one host and restore it on a different host.
Even if a user saves a VM on one host and tries to restore in another
one (along with migrating the saved state files), the expectation is
that it would work, right ?
> - if (vmdef->nnets > 0) {
> - virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> - _("cannot save domain with network interfaces"));
> - return -1;
> - }
> if (vmdef->nhostdevs > 0) {
> virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> _("cannot save domain with host devices"));
> diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> index 8d8be1f46e..3af5e7f2d1 100644
> --- a/src/ch/ch_monitor.c
> +++ b/src/ch/ch_monitor.c
> @@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to)
> }
> int
> -virCHMonitorBuildRestoreJson(const char *from, char **jsonstr)
> +virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char
**jsonstr)
> {
> + size_t i;
> g_autoptr(virJSONValue) restore_json = virJSONValueNewObject();
> -
> g_autofree char *path_url = g_strdup_printf("file://%s", from);
> +
> if (virJSONValueObjectAppendString(restore_json, "source_url",
path_url))
> return -1;
> + /* Pass the netconfig needed to restore with new netfds */
> + if (vmdef->nnets) {
> + g_autoptr(virJSONValue) nets = virJSONValueNewArray();
> + for (i = 0; i < vmdef->nnets; i++) {
> + g_autoptr(virJSONValue) net_json = virJSONValueNewObject();
> + g_autofree char *id = g_strdup_printf("%s_%ld",
CH_NET_ID_PREFIX, i);
> + if (virJSONValueObjectAppendString(net_json, "id", id) <
0)
> + return -1;
> + if (virJSONValueObjectAppendNumberInt(net_json, "num_fds",
vmdef->nets[i]->driver.virtio.queues))
> + return -1;
> + virJSONValueArrayAppend(nets, &net_json);
> + }
> + if (virJSONValueObjectAppend(restore_json, "net_fds",
&nets))
> + return -1;
> + }
> +
> if (!(*jsonstr = virJSONValueToString(restore_json, false)))
> return -1;
> diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
> index 375b7a49a4..02a6685d58 100644
> --- a/src/ch/ch_monitor.h
> +++ b/src/ch/ch_monitor.h
> @@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon,
> virDomainIOThreadInfo ***iothreads);
> int
> virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr);
> -int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr);
> +int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char
**jsonstr);
> diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> index b8c6080d5c..cb3539ebe5 100644
> --- a/src/ch/ch_process.c
> +++ b/src/ch/ch_process.c
> @@ -29,6 +29,7 @@
> #include "ch_process.h"
> #include "domain_cgroup.h"
> #include "domain_interface.h"
> +#include "viralloc.h"
> #include "virerror.h"
> #include "virfile.h"
> #include "virjson.h"
> @@ -707,6 +708,56 @@ chProcessAddNetworkDevices(virCHDriver *driver,
> return 0;
> }
> +/**
> + * virCHRestoreCreateNetworkDevices:
> + * @driver: pointer to driver structure
> + * @vmdef: pointer to domain definition
> + * @vmtapfds: returned array of FDs of guest interfaces
> + * @nvmtapfds: returned number of network indexes
> + * @nicindexes: returned array of network indexes
> + * @nnicindexes: returned number of network indexes
> + *
> + * Create network devices for the domain. This function is called during
> + * domain restore.
> + *
> + * Returns 0 on success or -1 in case of error
> +*/
> +static int
> +virCHRestoreCreateNetworkDevices(virCHDriver *driver,
> + virDomainDef *vmdef,
> + int **vmtapfds,
> + size_t *nvmtapfds,
> + int **nicindexes,
> + size_t *nnicindexes)
> +{
> + size_t i, j;
> + size_t tapfd_len;
> + size_t index_vmtapfds;
> + for (i = 0; i < vmdef->nnets; i++) {
> + g_autofree int *tapfds = NULL;
> + tapfd_len = vmdef->nets[i]->driver.virtio.queues;
> + if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("net definition failed validation"));
> + return -1;
> + }
> + tapfds = g_new0(int, tapfd_len);
> + memset(tapfds, -1, (tapfd_len) * sizeof(int));
> +
> + /* Connect Guest interfaces */
> + if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds,
> + nicindexes, nnicindexes) < 0)
> + return -1;
> +
> + index_vmtapfds = *nvmtapfds;
> + VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len);
> + for (j = 0; j < tapfd_len; j++) {
> + VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]);
> + } > + }
> + return 0;
> +}
> +
> /**
> * virCHProcessStartValidate:
> * @driver: pointer to driver structure
> @@ -908,14 +959,19 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj
*vm, const char *from
> g_autofree char *payload = NULL;
> g_autofree char *response = NULL;
> VIR_AUTOCLOSE mon_sockfd = -1;
> + g_autofree int *tapfds = NULL;
> + g_autofree int *nicindexes = NULL;
> size_t payload_len;
> + size_t ntapfds = 0;
> + size_t nnicindexes = 0;
> + int ret = -1;
> if (!priv->monitor) {
> /* Get the first monitor connection if not already */
> if (!(priv->monitor = virCHProcessConnectMonitor(driver, vm))) {
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> _("failed to create connection to CH
socket"));
> - return -1;
> + goto cleanup;
you can continue returning -1 here, as tapfds have not yet been created.
The next commit "ch: kill CH process if restore fails" kills the process
incase of any errors during restore. Hence, all failures after creating
priv->monitor needs to do 'goto cleanup'. But I guess this is not
correct commit to make this change. I'll move this change to the above
mentioned commit.
>
> > }
> > }
> > @@ -923,20 +979,10 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj
*vm, const char *from
> > vm->def->id = vm->pid;
> > priv->machineName = virCHDomainGetMachineName(vm);
> > - /* Pass 0, NULL as restore only works without networking support */
> > - if (virDomainCgroupSetupCgroup("ch", vm,
> > - 0, NULL, /* nnicindexes, nicindexes */
> > - &priv->cgroup,
> > - cfg->cgroupControllers,
> > - 0, /*maxThreadsPerProc*/
> > - priv->driver->privileged,
> > - priv->machineName) < 0)
> > - return -1;
> > -
> > - if (virCHMonitorBuildRestoreJson(from, &payload) < 0) {
> > + if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) {
> > virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > _("failed to restore domain"));
> > - return -1;
> > + goto cleanup;
>
> Same as above comment.
>
> > }
> > virBufferAddLit(&http_headers, "PUT /api/v1/vm.restore
HTTP/1.1\r\n");
> > @@ -949,21 +995,41 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj
*vm, const char *from
> > payload = virBufferContentAndReset(&buf);
> > if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0)
> > - return -1;
> > + goto cleanup;
>
> Ditto
>
> > - if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) <
0) {
> > + if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds,
&ntapfds, &nicindexes, &nnicindexes) < 0)
> > + goto cleanup;
> > +
> > + if (virDomainCgroupSetupCgroup("ch", vm,
> > + nnicindexes, nicindexes,
> > + &priv->cgroup,
> > + cfg->cgroupControllers,
> > + 0, /*maxThreadsPerProc*/
> > + priv->driver->privileged,
> > + priv->machineName) < 0)
> > + goto cleanup;
> > +
> > + /* Bring up netdevs before restoring vm */
> > + if (virDomainInterfaceStartDevices(vm->def) < 0)
> > + goto cleanup;
> > +
> > + if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds,
ntapfds) < 0) {
> > virReportSystemError(errno, "%s",
> > _("Failed to send restore request to
CH"));
> > - return -1;
> > + goto cleanup;
> > }
> > if (chSocketProcessHttpResponse(mon_sockfd) < 0)
> > - return -1;
> > + goto cleanup;
> > if (virCHProcessSetup(vm) < 0)
> > - return -1;
> > + goto cleanup;
> > virDomainObjSetState(vm, VIR_DOMAIN_PAUSED,
VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
> > + ret = 0;
> > - return 0;
> > + cleanup:
> > + if (tapfds)
> > + chCloseFDs(tapfds, ntapfds);
> > + return ret;
> > }
>
> --
> Regards,
> Praveen