This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.
PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.
For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.
So, enable NUMA node tag in pci-root controller on PPC.
Signed-off-by: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
---
docs/formatdomain.html.in | 5 ++
src/qemu/qemu_command.c | 25 +++++++++++-
src/qemu/qemu_domain.c | 15 ++++---
...emuxml2argv-spapr-pci-hos-bridge-numa-node.args | 26 ++++++++++++
...qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml | 43 ++++++++++++++++++++
tests/qemuxml2argvtest.c | 2 +
6 files changed, 109 insertions(+), 7 deletions(-)
create mode 100644
tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
create mode 100644
tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 11b3330..ea45146 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3492,6 +3492,11 @@
part of the specified NUMA node (it is up to the user of the
libvirt API to attach host devices to the correct
pci-expander-bus when assigning them to the domain).
+ On PPC64, the PCI devices can be specified to be part of a NUMA
+ node using only the pci-root controller with an optional
+ <code><node></code> subelement within the
+ <code><target></code> subelement. All the PCI devices
of
+ the guest will be part of the specified NUMA node.
</dd>
</dl>
<p>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 9adf0fe..ec794f0 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3055,6 +3055,25 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
return NULL;
}
+static int qemuBuildSPAPRGlobalPCIRootNodeCommandLine(virCommandPtr cmd,
+ virDomainControllerDefPtr def,
+ virQEMUCapsPtr qemuCaps)
+{
+ if (def->opts.pciopts.numaNode != -1) {
+ if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("the numa_node option with spapr-pci-host-bridge
controller "
+ "is not supported in this QEMU binary"));
+ return -1;
+ }
+ virCommandAddArg(cmd, "-global");
+ virCommandAddArgFormat(cmd, "spapr-pci-host-bridge.numa_node=%d",
+ def->opts.pciopts.numaNode);
+ }
+
+ return 0;
+}
+
static int
qemuBuildControllerDevCommandLine(virCommandPtr cmd,
@@ -3107,8 +3126,12 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
/* skip pci-root/pcie-root */
if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI &&
(cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT ||
- cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT))
+ cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) {
+ if (ARCH_IS_PPC64(def->os.arch))
+ if (qemuBuildSPAPRGlobalPCIRootNodeCommandLine(cmd, cont, qemuCaps)
< 0)
+ return -1;
continue;
+ }
/* first SATA controller on Q35 machines is implicit */
if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SATA &&
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 8cba755..b5f89a6 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3058,12 +3058,14 @@ qemuDomainDeviceDefPostParse(virDomainDeviceDefPtr dev,
/* if a PCI expander bus has a NUMA node set, make sure
* that NUMA node is configured in the guest <cpu><numa>
* array. NUMA cell id's in this array are numbered
- * from 0 .. size-1.
+ * from 0 .. size-1. Or On PPC, if the pci/pcie-root has the
+ * NUMA node set, do the same.
*/
- if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
- cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS)
&&
- (int) virDomainNumaGetNodeCount(def->numa)
- <= cont->opts.pciopts.numaNode) {
+ if (((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
+ cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) ||
+ (qemuDomainMachineIsPSeries(def) &&
+ cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT)) &&
+ (int) virDomainNumaGetNodeCount(def->numa) <=
cont->opts.pciopts.numaNode) {
virReportError(VIR_ERR_XML_ERROR,
_("%s with index %d is "
"configured for a NUMA node (%d) "
@@ -3814,7 +3816,8 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver,
}
if (pci && pci->idx == 0 &&
- pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
+ pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT &&
+ pci->opts.pciopts.numaNode == -1) {
VIR_DEBUG("Removing default pci-root from domain '%s'"
" for migration compatibility", def->name);
toremove++;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
new file mode 100644
index 0000000..7b70cb6
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
@@ -0,0 +1,26 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=3,cores=1,threads=8 \
+-numa node,nodeid=0,cpus=0-3,mem=1024 \
+-numa node,nodeid=1,cpus=4-7,mem=1024 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \
+-no-acpi \
+-boot c \
+-global spapr-pci-host-bridge.numa_node=1 \
+-device spapr-vscsi,id=scsi0,reg=0x2000 \
+-usb \
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-scsi0-0-0-0 \
+-device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,\
+drive=drive-scsi0-0-0-0,id=scsi0-0-0-0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
new file mode 100644
index 0000000..4dcd68b
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
@@ -0,0 +1,43 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+ <memory unit='KiB'>2097152</memory>
+ <currentMemory unit='MiB'>2048</currentMemory>
+ <vcpu placement='static'>8</vcpu>
+ <numatune>
+ <memory mode='strict' nodeset='1'/>
+ </numatune>
+ <cpu>
+ <topology sockets='3' cores='1' threads='8'/>
+ <numa>
+ <cell id='0' cpus='0-3' memory='1048576'
unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='1048576'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <os>
+ <type arch='ppc64' machine='pseries'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-ppc64</emulator>
+ <disk type='block' device='disk'>
+ <driver name='qemu' type='raw'/>
+ <source dev='/dev/HostVG/QEMUGuest1'/>
+ <target dev='hda' bus='scsi'/>
+ <address type='drive' controller='0' bus='0'
target='0' unit='0'/>
+ </disk>
+ <controller type='usb' index='0'/>
+ <controller type='scsi' index='0'/>
+ <controller type='pci' index='0' model='pci-root'>
+ <target>
+ <node>1</node>
+ </target>
+ </controller>
+ <memballoon model='none'/>
+ <panic model='pseries'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index d025930..8a5b96a 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2219,6 +2219,8 @@ mymain(void)
QEMU_CAPS_DEVICE_DMI_TO_PCI_BRIDGE, QEMU_CAPS_MACHINE_IOMMU);
DO_TEST("cpu-hotplug-startup", QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS);
+ DO_TEST("spapr-pci-hos-bridge-numa-node", QEMU_CAPS_NUMA,
+ QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
qemuTestDriverFree(&driver);