This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.
PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.
For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.
So patch enables NUMA node tag in pci-root controller on PPC.
The way to set the NUMA node is through the numa_node option of
spapr-pci-host-bridge device. However for the implicit PHB, the only way
to set the numa_node is from the -global option. The -global option applies
to all the PHBs unless explicitly specified with the option on the
respective PHB of CLI. The default PHB has the emulated devices only, so
the patch prevents setting the NUMA node for the default PHB.
Signed-off-by: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
---
docs/formatdomain.html.in | 4 +-
src/conf/domain_conf.c | 9 +++
src/qemu/qemu_command.c | 10 ++++
src/qemu/qemu_domain.c | 13 ++---
.../qemuxml2argv-pseries-default-phb-numa-node.xml | 29 +++++++++++
.../qemuxml2argv-pseries-phb-numa-node.args | 28 +++++++++++
.../qemuxml2argv-pseries-phb-numa-node.xml | 41 ++++++++++++++++
tests/qemuxml2argvtest.c | 6 ++
.../qemuxml2xmlout-pseries-phb-numa-node.xml | 52 ++++++++++++++++++++
tests/qemuxml2xmltest.c | 4 ++
10 files changed, 187 insertions(+), 9 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index c12efcf..04f31aa 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3778,7 +3778,9 @@
</dd>
<dt><code>node</code></dt>
<dd>
- pci-expander-bus controllers can have an
+ Some PCI controllers (pci-expander-bus for the pc machine
+ type, pcie-expander-bus for the q35 machine type and
+ pci-root for the pseries machine type) can have an
optional <code><node></code> subelement within
the <code><target></code> subelement, which is used to
set the NUMA node reported to the guest OS for that bus - the
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 3feeccb..8c4133c 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -9457,8 +9457,15 @@ virDomainControllerDefParseXML(xmlNodePtr node,
goto error;
}
}
- if (numaNode >= 0)
+ if (numaNode >= 0) {
def->opts.pciopts.numaNode = numaNode;
+ if (def->idx == 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("The PCI controller with index=0 can't "
+ "be associated with a NUMA node."));
+ goto error;
+ }
+ }
break;
default:
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 6ac26af..83b277b 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3038,6 +3038,16 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
virBufferAsprintf(&buf, "%s,index=%d,id=%s",
modelName, def->opts.pciopts.targetIndex,
def->info.alias);
+
+ if (def->opts.pciopts.numaNode != -1) {
+ if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE))
{
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("the spapr-pci-host-bridge controller "
+ "doesn't support numa_node on this QEMU
binary"));
+ goto error;
+ }
+ virBufferAsprintf(&buf, ",numa_node=%d",
def->opts.pciopts.numaNode);
+ }
break;
case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT:
case VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST:
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 464d3a1..8609e6c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3454,15 +3454,14 @@ qemuDomainControllerDefPostParse(virDomainControllerDefPtr cont,
return -1;
}
- /* if a PCI expander bus has a NUMA node set, make sure
- * that NUMA node is configured in the guest <cpu><numa>
- * array. NUMA cell id's in this array are numbered
+ /* if a PCI expander bus or pci-root on Pseries has a NUMA node
+ * set, make sure that NUMA node is configured in the guest
+ * <cpu><numa> array. NUMA cell id's in this array are numbered
* from 0 .. size-1.
*/
- if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
- cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) &&
- (int) virDomainNumaGetNodeCount(def->numa)
- <= cont->opts.pciopts.numaNode) {
+ if (cont->opts.pciopts.numaNode >= 0 &&
+ cont->opts.pciopts.numaNode >=
+ (int) virDomainNumaGetNodeCount(def->numa)) {
virReportError(VIR_ERR_XML_ERROR,
_("%s with index %d is "
"configured for a NUMA node (%d) "
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
b/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
new file mode 100644
index 0000000..12d277a
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-default-phb-numa-node.xml
@@ -0,0 +1,29 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>1ccfd97d-5eb4-478a-bbe6-88d254c16db7</uuid>
+ <memory unit='KiB'>1048576</memory>
+ <vcpu placement='static'>24</vcpu>
+ <numatune>
+ <memnode cellid="0" mode="strict" nodeset="1"/>
+ </numatune>
+ <cpu>
+ <topology sockets='3' cores='1' threads='8'/>
+ <numa>
+ <cell id='0' cpus='0-23' memory='1048576'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <os>
+ <type arch='ppc64' machine='pseries'>hvm</type>
+ </os>
+ <devices>
+ <emulator>/usr/bin/qemu-system-ppc64</emulator>
+ <!-- The default PHB (controller index 0) shouldn't be assigned a NUMA node
-->
+ <controller type='pci' index='0' model='pci-root'>
+ <target index='0'>
+ <node>0</node>
+ </target>
+ </controller>
+ <controller type='usb' model='none'/>
+ <memballoon model='none'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
new file mode 100644
index 0000000..bb569b4
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.args
@@ -0,0 +1,28 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=3,cores=1,threads=8 \
+-object memory-backend-ram,id=ram-node0,size=1073741824,host-nodes=1,\
+policy=bind \
+-numa node,nodeid=0,cpus=0-3,memdev=ram-node0 \
+-object memory-backend-ram,id=ram-node1,size=1073741824,host-nodes=2,\
+policy=bind \
+-numa node,nodeid=1,cpus=4-7,memdev=ram-node1 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=readline \
+-boot c \
+-device spapr-pci-host-bridge,index=1,id=pci.1,numa_node=1 \
+-device spapr-pci-host-bridge,index=2,id=pci.2 \
+-device spapr-pci-host-bridge,index=3,id=pci.3,numa_node=0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
new file mode 100644
index 0000000..d7c08e2
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-phb-numa-node.xml
@@ -0,0 +1,41 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+ <memory unit='KiB'>2097152</memory>
+ <vcpu placement='static'>8</vcpu>
+ <numatune>
+ <memnode cellid="0" mode="strict" nodeset="1"/>
+ <memnode cellid="1" mode="strict" nodeset="2"/>
+ </numatune>
+ <cpu>
+ <topology sockets='3' cores='1' threads='8'/>
+ <numa>
+ <cell id='0' cpus='0-3' memory='1048576'
unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='1048576'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <os>
+ <type arch='ppc64' machine='pseries'>hvm</type>
+ </os>
+ <devices>
+ <emulator>/usr/bin/qemu-system-ppc64</emulator>
+ <controller type='usb' model='none' index='0'/>
+ <controller type='pci' index='0' model='pci-root'>
+ <target index='0'/>
+ </controller>
+ <controller type='pci' index='1' model='pci-root'>
+ <target index='1'>
+ <node>1</node>
+ </target>
+ </controller>
+ <controller type='pci' index='2' model='pci-root'>
+ <target index='2'/>
+ </controller>
+ <controller type='pci' index='3' model='pci-root'>
+ <target index='3'>
+ <node>0</node>
+ </target>
+ </controller>
+ <memballoon model='none'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index b95ea46..25cfedd 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1753,6 +1753,12 @@ mymain(void)
QEMU_CAPS_NODEFCONFIG,
QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE);
DO_TEST_PARSE_ERROR("pseries-phb-wrong-target-index", NONE);
+ DO_TEST("pseries-phb-numa-node",
+ QEMU_CAPS_NUMA,
+ QEMU_CAPS_OBJECT_MEMORY_RAM,
+ QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE,
+ QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
+ DO_TEST_PARSE_ERROR("pseries-default-phb-numa-node", NONE);
DO_TEST("pseries-many-devices",
QEMU_CAPS_NODEFCONFIG,
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
new file mode 100644
index 0000000..0d11e6a
--- /dev/null
+++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-phb-numa-node.xml
@@ -0,0 +1,52 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+ <memory unit='KiB'>2097152</memory>
+ <currentMemory unit='KiB'>2097152</currentMemory>
+ <vcpu placement='static'>8</vcpu>
+ <numatune>
+ <memnode cellid='0' mode='strict' nodeset='1'/>
+ <memnode cellid='1' mode='strict' nodeset='2'/>
+ </numatune>
+ <os>
+ <type arch='ppc64' machine='pseries'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <cpu>
+ <topology sockets='3' cores='1' threads='8'/>
+ <numa>
+ <cell id='0' cpus='0-3' memory='1048576'
unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='1048576'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-ppc64</emulator>
+ <controller type='usb' index='0' model='none'/>
+ <controller type='pci' index='0' model='pci-root'>
+ <model name='spapr-pci-host-bridge'/>
+ <target index='0'/>
+ </controller>
+ <controller type='pci' index='1' model='pci-root'>
+ <model name='spapr-pci-host-bridge'/>
+ <target index='1'>
+ <node>1</node>
+ </target>
+ </controller>
+ <controller type='pci' index='2' model='pci-root'>
+ <model name='spapr-pci-host-bridge'/>
+ <target index='2'/>
+ </controller>
+ <controller type='pci' index='3' model='pci-root'>
+ <model name='spapr-pci-host-bridge'/>
+ <target index='3'>
+ <node>0</node>
+ </target>
+ </controller>
+ <memballoon model='none'/>
+ <panic model='pseries'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index 5e4b1d1..fdca28e 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -672,6 +672,10 @@ mymain(void)
DO_TEST("pseries-phb-default-missing",
QEMU_CAPS_NODEFCONFIG,
QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE);
+ DO_TEST("pseries-phb-numa-node",
+ QEMU_CAPS_NUMA,
+ QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE,
+ QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
DO_TEST("pseries-many-devices",
QEMU_CAPS_NODEFCONFIG,