[libvirt] [PATCH 0/2] Fix a migration crasher

So I've tried to migrate a guest recently (to answer a needinfo for some bug of mine). Meanwhile, I've ran into many problems and bugs we have (apparently nobody tried migrations lately). Here's the first round of patches. Second round will require proper deep copy function for virDomainDef struct. Michal Privoznik (2): qemuBuildMemoryBackendStr: Don't crash if no hugetlbfs is mounted qemu: Introduce qemuGetHupageMemPath src/qemu/qemu_command.c | 57 +++++-------------------------------------------- src/qemu/qemu_conf.c | 50 +++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 4 ++++ 3 files changed, 59 insertions(+), 52 deletions(-) -- 2.8.4

When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested: <memoryBacking> <hugepages/> </memoryBacking> and there are no hugepages configured on the destination, we try to dereference a NULL pointer. Program received signal SIGSEGV, Segmentation fault. 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 1447 if (virAsprintf(&ret, "%s/libvirt/qemu", hugepage->mnt_dir) < 0) (gdb) bt #0 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 #1 0x00007fcc907fb2f5 in qemuGetDefaultHugepath (hugetlbfs=0x0, nhugetlbfs=0) at qemu/qemu_conf.c:1466 #2 0x00007fcc907b4afa in qemuBuildMemoryBackendStr (size=4194304, pagesize=0, guestNode=0, userNodeset=0x0, autoNodeset=0x0, def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, backendType=0x7fcc95087228, backendProps=0x7fcc95087218, force=false) at qemu/qemu_command.c:3297 #3 0x00007fcc907b4f91 in qemuBuildMemoryCellBackendStr (def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, cell=0, auto_nodeset=0x0, backendStr=0x7fcc70020360) at qemu/qemu_command.c:3413 #4 0x00007fcc907c0406 in qemuBuildNumaArgStr (cfg=0x7fcc5c011800, def=0x7fcc70019070, cmd=0x7fcc700040c0, qemuCaps=0x7fcc70004000, auto_nodeset=0x0) at qemu/qemu_command.c:7470 #5 0x00007fcc907c5fdf in qemuBuildCommandLine (driver=0x7fcc5c07b8a0, logManager=0x7fcc70003c00, def=0x7fcc70019070, monitor_chr=0x7fcc70004bb0, monitor_json=true, qemuCaps=0x7fcc70004000, migrateURI=0x7fcc700199c0 "defer", snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, standalone=false, enableFips=false, nodeset=0x0, nnicindexes=0x7fcc95087498, nicindexes=0x7fcc950874a0, domainLibDir=0x7fcc700047c0 "/var/lib/libvirt/qemu/domain-1-fedora") at qemu/qemu_command.c:9547 #6 0x00007fcc90807d37 in qemuProcessLaunch (conn=0x7fcc54000b10, driver=0x7fcc5c07b8a0, vm=0x7fcc70005320, asyncJob=QEMU_ASYNC_JOB_MIGRATION_IN, incoming=0x7fcc70015d70, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, flags=4) at qemu/qemu_process.c:5238 #7 0x00007fcc90815233 in qemuMigrationPrepareAny (driver=0x7fcc5c07b8a0, dconn=0x7fcc54000b10, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, def=0x7fcc950878f0, origname=0x0, st=0x0, protocol=0x7fcc908ab42f "tcp", port=49152, autoPort=true, listenAddress=0x0, nmigrate_disks=0, migrate_disks=0x0, nbdPort=0, compression=0x7fcc70000a30, flags=64) at qemu/qemu_migration.c:3752 #8 0x00007fcc908161ae in qemuMigrationPrepareDirect (driver=0x7fcc5c07b8a0, dconn=0x7fcc54000b10, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_in=0x0, uri_out=0x7fcc700008c0, def=0x7fcc950878f0, origname=0x0, listenAddress=0x0, nmigrate_disks=0, migrate_disks=0x0, nbdPort=0, compression=0x7fcc70000a30, flags=64) at qemu/qemu_migration.c:4100 #9 0x00007fcc908673f3 in qemuDomainMigratePrepare3Params (dconn=0x7fcc54000b10, params=0x7fcc70000930, nparams=1, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_out=0x7fcc700008c0, flags=64) at qemu/qemu_driver.c:12204 #10 0x00007fcc9d5499da in virDomainMigratePrepare3Params (dconn=0x7fcc54000b10, params=0x7fcc70000930, nparams=1, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_out=0x7fcc700008c0, flags=64) at libvirt-domain.c:5141 #11 0x000055c71ce1c9f5 in remoteDispatchDomainMigratePrepare3Params (server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460, rerr=0x7fcc95087b80, args=0x7fcc70000900, ret=0x7fcc70000af0) at remote.c:5117 #12 0x000055c71cdfbd9e in remoteDispatchDomainMigratePrepare3ParamsHelper (server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460, rerr=0x7fcc95087b80, args=0x7fcc70000900, ret=0x7fcc70000af0) at remote_dispatch.h:7820 #13 0x00007fcc9d5cd810 in virNetServerProgramDispatchCall (prog=0x55c71f0ad070, server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460) at rpc/virnetserverprogram.c:437 #14 0x00007fcc9d5cd371 in virNetServerProgramDispatch (prog=0x55c71f0ad070, server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460) at rpc/virnetserverprogram.c:307 #15 0x000055c71ce35903 in virNetServerProcessMsg (srv=0x55c71f0a0cc0, client=0x55c71f0bef20, prog=0x55c71f0ad070, msg=0x55c71f0ae460) at rpc/virnetserver.c:148 #16 0x000055c71ce359c3 in virNetServerHandleJob (jobOpaque=0x55c71f0aef30, opaque=0x55c71f0a0cc0) at rpc/virnetserver.c:169 #17 0x00007fcc9d47a6f3 in virThreadPoolWorker (opaque=0x55c71f0ae540) at util/virthreadpool.c:167 #18 0x00007fcc9d479c95 in virThreadHelper (data=0x55c71f0ae560) at util/virthread.c:206 #19 0x00007fcc9aabd494 in start_thread () from /lib64/libpthread.so.0 #20 0x00007fcc9a8025dd in clone () from /lib64/libc.so.6 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 038c38f..0bafc3f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3294,6 +3294,12 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) goto cleanup; } else { + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + goto cleanup; + } if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, cfg->nhugetlbfs))) goto cleanup; -- 2.8.4

On Mon, Sep 19, 2016 at 07:55:47 +0200, Michal Privoznik wrote:
When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested:
<memoryBacking> <hugepages/> </memoryBacking>
and there are no hugepages configured on the destination, we try to dereference a NULL pointer.
Program received signal SIGSEGV, Segmentation fault. 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 1447 if (virAsprintf(&ret, "%s/libvirt/qemu", hugepage->mnt_dir) < 0) (gdb) bt #0 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 #1 0x00007fcc907fb2f5 in qemuGetDefaultHugepath (hugetlbfs=0x0, nhugetlbfs=0) at qemu/qemu_conf.c:1466 #2 0x00007fcc907b4afa in qemuBuildMemoryBackendStr (size=4194304, pagesize=0, guestNode=0, userNodeset=0x0, autoNodeset=0x0, def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, backendType=0x7fcc95087228, backendProps=0x7fcc95087218, force=false) at qemu/qemu_command.c:3297 #3 0x00007fcc907b4f91 in qemuBuildMemoryCellBackendStr (def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, cell=0, auto_nodeset=0x0, backendStr=0x7fcc70020360) at qemu/qemu_command.c:3413 #4 0x00007fcc907c0406 in qemuBuildNumaArgStr (cfg=0x7fcc5c011800, def=0x7fcc70019070, cmd=0x7fcc700040c0, qemuCaps=0x7fcc70004000, auto_nodeset=0x0) at qemu/qemu_command.c:7470 #5 0x00007fcc907c5fdf in qemuBuildCommandLine (driver=0x7fcc5c07b8a0, logManager=0x7fcc70003c00, def=0x7fcc70019070, monitor_chr=0x7fcc70004bb0, monitor_json=true, qemuCaps=0x7fcc70004000, migrateURI=0x7fcc700199c0 "defer", snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, standalone=false, enableFips=false, nodeset=0x0, nnicindexes=0x7fcc95087498, nicindexes=0x7fcc950874a0, domainLibDir=0x7fcc700047c0 "/var/lib/libvirt/qemu/domain-1-fedora") at qemu/qemu_command.c:9547
I think you can trim the backtrace here.
#6 0x00007fcc90807d37 in qemuProcessLaunch (conn=0x7fcc54000b10, driver=0x7fcc5c07b8a0, vm=0x7fcc70005320, asyncJob=QEMU_ASYNC_JOB_MIGRATION_IN, incoming=0x7fcc70015d70, snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, flags=4) at qemu/qemu_process.c:5238 #7 0x00007fcc90815233 in qemuMigrationPrepareAny (driver=0x7fcc5c07b8a0, dconn=0x7fcc54000b10, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, def=0x7fcc950878f0, origname=0x0, st=0x0, protocol=0x7fcc908ab42f "tcp", port=49152, autoPort=true, listenAddress=0x0, nmigrate_disks=0, migrate_disks=0x0, nbdPort=0, compression=0x7fcc70000a30, flags=64) at qemu/qemu_migration.c:3752 #8 0x00007fcc908161ae in qemuMigrationPrepareDirect (driver=0x7fcc5c07b8a0, dconn=0x7fcc54000b10, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_in=0x0, uri_out=0x7fcc700008c0, def=0x7fcc950878f0, origname=0x0, listenAddress=0x0, nmigrate_disks=0, migrate_disks=0x0, nbdPort=0, compression=0x7fcc70000a30, flags=64) at qemu/qemu_migration.c:4100 #9 0x00007fcc908673f3 in qemuDomainMigratePrepare3Params (dconn=0x7fcc54000b10, params=0x7fcc70000930, nparams=1,
or here if you think it's extremely useful to see the migration code path.
cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_out=0x7fcc700008c0, flags=64) at qemu/qemu_driver.c:12204 #10 0x00007fcc9d5499da in virDomainMigratePrepare3Params (dconn=0x7fcc54000b10, params=0x7fcc70000930, nparams=1, cookiein=0x7fcc70001ec0 "<qemu-migration>\n <name>fedora</name>\n <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>\n <hostname>lisa</hostname>\n <hostuuid>da1c2801-52f3-11cb-a9db-e905fe22010c</hostuuid>\n <feature name='lock"..., cookieinlen=328, cookieout=0x7fcc95087aa0, cookieoutlen=0x7fcc95087a90, uri_out=0x7fcc700008c0, flags=64) at libvirt-domain.c:5141 #11 0x000055c71ce1c9f5 in remoteDispatchDomainMigratePrepare3Params (server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460, rerr=0x7fcc95087b80, args=0x7fcc70000900, ret=0x7fcc70000af0) at remote.c:5117 #12 0x000055c71cdfbd9e in remoteDispatchDomainMigratePrepare3ParamsHelper (server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460, rerr=0x7fcc95087b80, args=0x7fcc70000900, ret=0x7fcc70000af0) at remote_dispatch.h:7820 #13 0x00007fcc9d5cd810 in virNetServerProgramDispatchCall (prog=0x55c71f0ad070, server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460) at rpc/virnetserverprogram.c:437 #14 0x00007fcc9d5cd371 in virNetServerProgramDispatch (prog=0x55c71f0ad070, server=0x55c71f0a0cc0, client=0x55c71f0bef20, msg=0x55c71f0ae460) at rpc/virnetserverprogram.c:307 #15 0x000055c71ce35903 in virNetServerProcessMsg (srv=0x55c71f0a0cc0, client=0x55c71f0bef20, prog=0x55c71f0ad070, msg=0x55c71f0ae460) at rpc/virnetserver.c:148 #16 0x000055c71ce359c3 in virNetServerHandleJob (jobOpaque=0x55c71f0aef30, opaque=0x55c71f0a0cc0) at rpc/virnetserver.c:169 #17 0x00007fcc9d47a6f3 in virThreadPoolWorker (opaque=0x55c71f0ae540) at util/virthreadpool.c:167 #18 0x00007fcc9d479c95 in virThreadHelper (data=0x55c71f0ae560) at util/virthread.c:206 #19 0x00007fcc9aabd494 in start_thread () from /lib64/libpthread.so.0 #20 0x00007fcc9a8025dd in clone () from /lib64/libc.so.6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 038c38f..0bafc3f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3294,6 +3294,12 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) goto cleanup; } else { + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + goto cleanup; + } if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, cfg->nhugetlbfs)))
I think the bug is in qemuGetDefaultHugepath function rather than the caller: char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, size_t nhugetlbfs) { size_t i; for (i = 0; i < nhugetlbfs; i++) if (hugetlbfs[i].deflt) break; if (i == nhugetlbfs) i = 0; return qemuGetHugepagePath(&hugetlbfs[i]); ^^ it dereferences the first member of the first argument even if you explicitly specify that the array has 0 elements. } If you want to go with the fix as in this patch then you'll need to add a comment to qemuGetDefaultHugepath that the callers should make sure that it's called with at least one hugepage entry. Or fix qemuGetDefaultHugepath rathr than working it around in the caller. Peter

On 19.09.2016 08:32, Peter Krempa wrote:
On Mon, Sep 19, 2016 at 07:55:47 +0200, Michal Privoznik wrote:
When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested:
<memoryBacking> <hugepages/> </memoryBacking>
and there are no hugepages configured on the destination, we try to dereference a NULL pointer.
Program received signal SIGSEGV, Segmentation fault. 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 1447 if (virAsprintf(&ret, "%s/libvirt/qemu", hugepage->mnt_dir) < 0) (gdb) bt #0 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 #1 0x00007fcc907fb2f5 in qemuGetDefaultHugepath (hugetlbfs=0x0, nhugetlbfs=0) at qemu/qemu_conf.c:1466 #2 0x00007fcc907b4afa in qemuBuildMemoryBackendStr (size=4194304, pagesize=0, guestNode=0, userNodeset=0x0, autoNodeset=0x0, def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, backendType=0x7fcc95087228, backendProps=0x7fcc95087218, force=false) at qemu/qemu_command.c:3297 #3 0x00007fcc907b4f91 in qemuBuildMemoryCellBackendStr (def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, cell=0, auto_nodeset=0x0, backendStr=0x7fcc70020360) at qemu/qemu_command.c:3413 #4 0x00007fcc907c0406 in qemuBuildNumaArgStr (cfg=0x7fcc5c011800, def=0x7fcc70019070, cmd=0x7fcc700040c0, qemuCaps=0x7fcc70004000, auto_nodeset=0x0) at qemu/qemu_command.c:7470 #5 0x00007fcc907c5fdf in qemuBuildCommandLine (driver=0x7fcc5c07b8a0, logManager=0x7fcc70003c00, def=0x7fcc70019070, monitor_chr=0x7fcc70004bb0, monitor_json=true, qemuCaps=0x7fcc70004000, migrateURI=0x7fcc700199c0 "defer", snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, standalone=false, enableFips=false, nodeset=0x0, nnicindexes=0x7fcc95087498, nicindexes=0x7fcc950874a0, domainLibDir=0x7fcc700047c0 "/var/lib/libvirt/qemu/domain-1-fedora") at qemu/qemu_command.c:9547
I think you can trim the backtrace here.
Okay,
--- src/qemu/qemu_command.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 038c38f..0bafc3f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3294,6 +3294,12 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) goto cleanup; } else { + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + goto cleanup; + } if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, cfg->nhugetlbfs)))
I think the bug is in qemuGetDefaultHugepath function rather than the caller:
char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, size_t nhugetlbfs) { size_t i;
for (i = 0; i < nhugetlbfs; i++) if (hugetlbfs[i].deflt) break;
if (i == nhugetlbfs) i = 0;
return qemuGetHugepagePath(&hugetlbfs[i]);
^^ it dereferences the first member of the first argument even if you explicitly specify that the array has 0 elements.
That i = 0 assignment does not state array size, it just picks the first element in the array if no explicitly set default huge page size is found.
}
If you want to go with the fix as in this patch then you'll need to add a comment to qemuGetDefaultHugepath that the callers should make sure that it's called with at least one hugepage entry. Or fix qemuGetDefaultHugepath rathr than working it around in the caller.
Well, after my 2nd patch, there's no other caller than a fixed one. And I think qemuGetDefaultHugepath is really designed to be just a simple function to fetch caller the path to huge pages. But okay, I can discard 2/2 and basically move out its internals to various other functions (like qemuGetDefaultHugepath). Michal

On Mon, Sep 19, 2016 at 08:43:17 +0200, Michal Privoznik wrote:
On 19.09.2016 08:32, Peter Krempa wrote:
On Mon, Sep 19, 2016 at 07:55:47 +0200, Michal Privoznik wrote:
When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested:
<memoryBacking> <hugepages/> </memoryBacking>
and there are no hugepages configured on the destination, we try to dereference a NULL pointer.
Program received signal SIGSEGV, Segmentation fault. 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 1447 if (virAsprintf(&ret, "%s/libvirt/qemu", hugepage->mnt_dir) < 0) (gdb) bt #0 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 #1 0x00007fcc907fb2f5 in qemuGetDefaultHugepath (hugetlbfs=0x0, nhugetlbfs=0) at qemu/qemu_conf.c:1466 #2 0x00007fcc907b4afa in qemuBuildMemoryBackendStr (size=4194304, pagesize=0, guestNode=0, userNodeset=0x0, autoNodeset=0x0, def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, backendType=0x7fcc95087228, backendProps=0x7fcc95087218, force=false) at qemu/qemu_command.c:3297 #3 0x00007fcc907b4f91 in qemuBuildMemoryCellBackendStr (def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, cell=0, auto_nodeset=0x0, backendStr=0x7fcc70020360) at qemu/qemu_command.c:3413 #4 0x00007fcc907c0406 in qemuBuildNumaArgStr (cfg=0x7fcc5c011800, def=0x7fcc70019070, cmd=0x7fcc700040c0, qemuCaps=0x7fcc70004000, auto_nodeset=0x0) at qemu/qemu_command.c:7470 #5 0x00007fcc907c5fdf in qemuBuildCommandLine (driver=0x7fcc5c07b8a0, logManager=0x7fcc70003c00, def=0x7fcc70019070, monitor_chr=0x7fcc70004bb0, monitor_json=true, qemuCaps=0x7fcc70004000, migrateURI=0x7fcc700199c0 "defer", snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, standalone=false, enableFips=false, nodeset=0x0, nnicindexes=0x7fcc95087498, nicindexes=0x7fcc950874a0, domainLibDir=0x7fcc700047c0 "/var/lib/libvirt/qemu/domain-1-fedora") at qemu/qemu_command.c:9547
I think you can trim the backtrace here.
Okay,
--- src/qemu/qemu_command.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 038c38f..0bafc3f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3294,6 +3294,12 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) goto cleanup; } else { + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + goto cleanup; + } if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, cfg->nhugetlbfs)))
I think the bug is in qemuGetDefaultHugepath function rather than the caller:
char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, size_t nhugetlbfs) { size_t i;
for (i = 0; i < nhugetlbfs; i++) if (hugetlbfs[i].deflt) break;
if (i == nhugetlbfs) i = 0;
return qemuGetHugepagePath(&hugetlbfs[i]);
^^ it dereferences the first member of the first argument even if you explicitly specify that the array has 0 elements.
That i = 0 assignment does not state array size, it just picks the first element in the array if no explicitly set default huge page size is found.
It still dereferences the first element in the array despite the caller passing that there are no elements in that array.
}
If you want to go with the fix as in this patch then you'll need to add a comment to qemuGetDefaultHugepath that the callers should make sure that it's called with at least one hugepage entry. Or fix qemuGetDefaultHugepath rathr than working it around in the caller.
Well, after my 2nd patch, there's no other caller than a fixed one.
That's great.
And I think qemuGetDefaultHugepath is really designed to be just a simple function to fetch caller the path to huge pages. But okay, I can
That's great, but it has unusual semantics so you need to document it.
discard 2/2 and basically move out its internals to various other functions (like qemuGetDefaultHugepath).
Why?

Now that we have two same implementations for getting path for huge pages backed guest memory, lets merge them into one function. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 63 ++++--------------------------------------------- src/qemu/qemu_conf.c | 50 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 4 ++++ 3 files changed, 59 insertions(+), 58 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 0bafc3f..051a0bc 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3275,35 +3275,9 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(props = virJSONValueNewObject())) return -1; - if (pagesize || hugepage) { - if (pagesize) { - /* Now lets see, if the huge page we want to use is even mounted - * and ready to use */ - for (i = 0; i < cfg->nhugetlbfs; i++) { - if (cfg->hugetlbfs[i].size == pagesize) - break; - } - - if (i == cfg->nhugetlbfs) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unable to find any usable hugetlbfs mount for %llu KiB"), - pagesize); - goto cleanup; - } - - if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) - goto cleanup; - } else { - if (!cfg->nhugetlbfs) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("hugetlbfs filesystem is not mounted " - "or disabled by administrator config")); - goto cleanup; - } - if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, - cfg->nhugetlbfs))) - goto cleanup; - } + if (pagesize) { + if (qemuGetHupageMemPath(cfg, pagesize, &mem_path) < 0) + goto cleanup; *backendType = "memory-backend-file"; @@ -7272,7 +7246,6 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, { const long system_page_size = virGetSystemPageSizeKB(); char *mem_path = NULL; - size_t i = 0; /* * No-op if hugepages were not requested. @@ -7287,13 +7260,6 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, if (def->mem.hugepages[0].size == system_page_size) return 0; - if (!cfg->nhugetlbfs) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("hugetlbfs filesystem is not mounted " - "or disabled by administrator config")); - return -1; - } - if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_MEM_PATH)) { virReportError(VIR_ERR_INTERNAL_ERROR, _("hugepage backing not supported by '%s'"), @@ -7301,27 +7267,8 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, return -1; } - if (!def->mem.hugepages[0].size) { - if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, - cfg->nhugetlbfs))) - return -1; - } else { - for (i = 0; i < cfg->nhugetlbfs; i++) { - if (cfg->hugetlbfs[i].size == def->mem.hugepages[0].size) - break; - } - - if (i == cfg->nhugetlbfs) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unable to find any usable hugetlbfs " - "mount for %llu KiB"), - def->mem.hugepages[0].size); - return -1; - } - - if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) - return -1; - } + if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) < 0) + return -1; virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path, NULL); VIR_FREE(mem_path); diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index dad8334..5a97e96 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1465,3 +1465,53 @@ qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, return qemuGetHugepagePath(&hugetlbfs[i]); } + + +/** + * qemuGetHupageMemPath: Construct HP enabled memory backend path + * + * If no specific hugepage size is requested (@pagesize is zero) + * the default hugepage size is used). + * The resulting path is stored at @memPath. + * + * Returns 0 on success, + * -1 otherwise. + */ +int +qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, + unsigned long long pagesize, + char **memPath) +{ + size_t i = 0; + + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + return -1; + } + + if (!pagesize) { + if (!(*memPath = qemuGetDefaultHugepath(cfg->hugetlbfs, + cfg->nhugetlbfs))) + return -1; + } else { + for (i = 0; i < cfg->nhugetlbfs; i++) { + if (cfg->hugetlbfs[i].size == pagesize) + break; + } + + if (i == cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to find any usable hugetlbfs " + "mount for %llu KiB"), + pagesize); + return -1; + } + + if (!(*memPath = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) + return -1; + } + + return 0; +} diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index d8232cc..d32689a 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -335,4 +335,8 @@ int qemuTranslateSnapshotDiskSourcePool(virConnectPtr conn, char * qemuGetHugepagePath(virHugeTLBFSPtr hugepage); char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, size_t nhugetlbfs); + +int qemuGetHupageMemPath(virQEMUDriverConfigPtr cfg, + unsigned long long pagesize, + char **memPath); #endif /* __QEMUD_CONF_H */ -- 2.8.4

On Mon, Sep 19, 2016 at 07:55:48 +0200, Michal Privoznik wrote:
Now that we have two same implementations for getting path for huge pages backed guest memory, lets merge them into one function.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 63 ++++--------------------------------------------- src/qemu/qemu_conf.c | 50 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 4 ++++ 3 files changed, 59 insertions(+), 58 deletions(-)
ACK

When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested: <memoryBacking> <hugepages/> </memoryBacking> and there are no hugepages configured on the destination, we try to dereference a NULL pointer. Program received signal SIGSEGV, Segmentation fault. 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 1447 if (virAsprintf(&ret, "%s/libvirt/qemu", hugepage->mnt_dir) < 0) (gdb) bt #0 0x00007fcc907fb20e in qemuGetHugepagePath (hugepage=0x0) at qemu/qemu_conf.c:1447 #1 0x00007fcc907fb2f5 in qemuGetDefaultHugepath (hugetlbfs=0x0, nhugetlbfs=0) at qemu/qemu_conf.c:1466 #2 0x00007fcc907b4afa in qemuBuildMemoryBackendStr (size=4194304, pagesize=0, guestNode=0, userNodeset=0x0, autoNodeset=0x0, def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, backendType=0x7fcc95087228, backendProps=0x7fcc95087218, force=false) at qemu/qemu_command.c:3297 #3 0x00007fcc907b4f91 in qemuBuildMemoryCellBackendStr (def=0x7fcc70019070, qemuCaps=0x7fcc70004000, cfg=0x7fcc5c011800, cell=0, auto_nodeset=0x0, backendStr=0x7fcc70020360) at qemu/qemu_command.c:3413 #4 0x00007fcc907c0406 in qemuBuildNumaArgStr (cfg=0x7fcc5c011800, def=0x7fcc70019070, cmd=0x7fcc700040c0, qemuCaps=0x7fcc70004000, auto_nodeset=0x0) at qemu/qemu_command.c:7470 #5 0x00007fcc907c5fdf in qemuBuildCommandLine (driver=0x7fcc5c07b8a0, logManager=0x7fcc70003c00, def=0x7fcc70019070, monitor_chr=0x7fcc70004bb0, monitor_json=true, qemuCaps=0x7fcc70004000, migrateURI=0x7fcc700199c0 "defer", snapshot=0x0, vmop=VIR_NETDEV_VPORT_PROFILE_OP_MIGRATE_IN_START, standalone=false, enableFips=false, nodeset=0x0, nnicindexes=0x7fcc95087498, nicindexes=0x7fcc950874a0, domainLibDir=0x7fcc700047c0 "/var/lib/libvirt/qemu/domain-1-fedora") at qemu/qemu_command.c:9547 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- diff to v1: - added a comment to qemuGetDefaultHugepath() as requested in the review src/qemu/qemu_command.c | 6 ++++++ src/qemu/qemu_conf.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 038c38f..0bafc3f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3294,6 +3294,12 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[i]))) goto cleanup; } else { + if (!cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("hugetlbfs filesystem is not mounted " + "or disabled by administrator config")); + goto cleanup; + } if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, cfg->nhugetlbfs))) goto cleanup; diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index dad8334..901ad23 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1450,6 +1450,16 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) return ret; } + +/** + * qemuGetDefaultHugepath: + * + * Get default hugepages path for memory backend. Does not work + * well with @nhugetlbfs == 0 or @hugetlbfs == NULL. + * + * Returns 0 on success, + * -1 otherwise. + * */ char * qemuGetDefaultHugepath(virHugeTLBFSPtr hugetlbfs, size_t nhugetlbfs) -- 2.8.4

On Mon, Sep 19, 2016 at 11:03:30 +0200, Michal Privoznik wrote:
When trying to migrate a huge page enabled guest, I've noticed the following crash. Apparently, if no specific hugepages are requested:
<memoryBacking> <hugepages/> </memoryBacking>
[...]
---
diff to v1:
- added a comment to qemuGetDefaultHugepath() as requested in the review
src/qemu/qemu_command.c | 6 ++++++ src/qemu/qemu_conf.c | 10 ++++++++++ 2 files changed, 16 insertions(+)
[...]
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index dad8334..901ad23 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1450,6 +1450,16 @@ qemuGetHugepagePath(virHugeTLBFSPtr hugepage) return ret; }
+ +/** + * qemuGetDefaultHugepath: + * + * Get default hugepages path for memory backend. Does not work + * well with @nhugetlbfs == 0 or @hugetlbfs == NULL.
'Callers must ensure that @hugetlbfs contains at least one entry.' instead of the second sentence which doesn't really tell much. Also add a description of the arguments since you are referring to them in the comment.
+ * + * Returns 0 on success, + * -1 otherwise.
These will fit on a signe line.
+ * */
ACK with the above issues resolved. Peter
participants (2)
-
Michal Privoznik
-
Peter Krempa