This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.
PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.
For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.
So patch enables NUMA node tag in pci-root controller on PPC.
The way to set the NUMA node is through the numa_node option of
spapr-pci-host-bridge device. However for the implicit PHB, the only way
to set the numa_node is from the -global option. The -global option applies
to all the PHBs unless explicitly specified with the option on the
respective PHB of CLI. The default PHB has the emulated devices only, so
the patch prevents setting the NUMA node for the default PHB.
Signed-off-by: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
---
docs/formatdomain.html.in | 5 ++
src/conf/domain_conf.c | 6 ++
src/qemu/qemu_command.c | 10 ++++
src/qemu/qemu_domain.c | 9 +++
...muxml2argv-spapr-pci-host-bridge-numa-node.args | 29 +++++++++++
...emuxml2argv-spapr-pci-host-bridge-numa-node.xml | 54 ++++++++++++++++++++
tests/qemuxml2argvtest.c | 3 +
7 files changed, 114 insertions(+), 2 deletions(-)
create mode 100644
tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.args
create mode 100644
tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index c12efcf..64fe241 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3786,6 +3786,11 @@
part of the specified NUMA node (it is up to the user of the
libvirt API to attach host devices to the correct
pci-expander-bus when assigning them to the domain).
+ On PPC64, the PCI devices can be specified to be part of a NUMA
+ node using only the pci-root controller with an optional
+ <code><node></code> subelement within the
+ <code><target></code> subelement. The PCI devices on
the
+ given pci-root controller will be part of the specified NUMA node.
</dd>
<dt><code>index</code></dt>
<dd>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 3feeccb..b8cdcd1 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -9457,6 +9457,12 @@ virDomainControllerDefParseXML(xmlNodePtr node,
goto error;
}
}
+ if (def->idx == 0 && numaNode >= 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Only the PCI controller with index != 0 can "
+ "have NUMA node property specified"));
+ goto error;
+ }
if (numaNode >= 0)
def->opts.pciopts.numaNode = numaNode;
break;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 6ac26af..83b277b 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3038,6 +3038,16 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
virBufferAsprintf(&buf, "%s,index=%d,id=%s",
modelName, def->opts.pciopts.targetIndex,
def->info.alias);
+
+ if (def->opts.pciopts.numaNode != -1) {
+ if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE))
{
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("the spapr-pci-host-bridge controller "
+ "doesn't support numa_node on this QEMU
binary"));
+ goto error;
+ }
+ virBufferAsprintf(&buf, ",numa_node=%d",
def->opts.pciopts.numaNode);
+ }
break;
case VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT:
case VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST:
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 464d3a1..7732fa1 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3458,9 +3458,14 @@ qemuDomainControllerDefPostParse(virDomainControllerDefPtr cont,
* that NUMA node is configured in the guest <cpu><numa>
* array. NUMA cell id's in this array are numbered
* from 0 .. size-1.
+ *
+ * On PSeries, the NUMA node is set at the PHB.
*/
- if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
- cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) &&
+ if (((qemuDomainIsPSeries(def) &&
+ cont->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI &&
+ cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) ||
+ (cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
+ cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS))
&&
(int) virDomainNumaGetNodeCount(def->numa)
<= cont->opts.pciopts.numaNode) {
virReportError(VIR_ERR_XML_ERROR,
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.args
b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.args
new file mode 100644
index 0000000..3e29d73
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.args
@@ -0,0 +1,29 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=3,cores=1,threads=8 \
+-numa node,nodeid=0,cpus=0-3,mem=1024 \
+-numa node,nodeid=1,cpus=4-7,mem=1024 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=readline \
+-boot c \
+-device spapr-pci-host-bridge,index=1,id=pci.1,numa_node=1 \
+-device spapr-pci-host-bridge,index=2,id=pci.2 \
+-device spapr-pci-host-bridge,index=3,id=pci.3,numa_node=0 \
+-device spapr-vscsi,id=scsi0,reg=0x2000 \
+-usb \
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-scsi0-0-0-0 \
+-device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,\
+drive=drive-scsi0-0-0-0,id=scsi0-0-0-0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.xml
b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.xml
new file mode 100644
index 0000000..566879c
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-host-bridge-numa-node.xml
@@ -0,0 +1,54 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+ <memory unit='KiB'>2097152</memory>
+ <currentMemory unit='MiB'>2048</currentMemory>
+ <vcpu placement='static'>8</vcpu>
+ <numatune>
+ <memory mode='strict' nodeset='1'/>
+ </numatune>
+ <cpu>
+ <topology sockets='3' cores='1' threads='8'/>
+ <numa>
+ <cell id='0' cpus='0-3' memory='1048576'
unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='1048576'
unit='KiB'/>
+ </numa>
+ </cpu>
+ <os>
+ <type arch='ppc64' machine='pseries'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-ppc64</emulator>
+ <disk type='block' device='disk'>
+ <driver name='qemu' type='raw'/>
+ <source dev='/dev/HostVG/QEMUGuest1'/>
+ <target dev='hda' bus='scsi'/>
+ <address type='drive' controller='0' bus='0'
target='0' unit='0'/>
+ </disk>
+ <controller type='usb' index='0'/>
+ <controller type='scsi' index='0'/>
+ <controller type='pci' index='0' model='pci-root'>
+ <target index='0'/>
+ </controller>
+ <controller type='pci' index='1' model='pci-root'>
+ <target index='1'>
+ <node>1</node>
+ </target>
+ </controller>
+ <controller type='pci' index='2' model='pci-root'>
+ <target index='2'/>
+ </controller>
+ <controller type='pci' index='3' model='pci-root'>
+ <target index='3'>
+ <node>0</node>
+ </target>
+ </controller>
+ <memballoon model='none'/>
+ <panic model='pseries'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index b95ea46..5cfad39 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2739,6 +2739,9 @@ mymain(void)
DO_TEST_PARSE_ERROR("cpu-cache-emulate-l2", QEMU_CAPS_KVM);
DO_TEST_PARSE_ERROR("cpu-cache-passthrough3", QEMU_CAPS_KVM);
DO_TEST_PARSE_ERROR("cpu-cache-passthrough-l3", QEMU_CAPS_KVM);
+ DO_TEST("spapr-pci-host-bridge-numa-node", QEMU_CAPS_NUMA,
+ QEMU_CAPS_DEVICE_SPAPR_PCI_HOST_BRIDGE,
+ QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL)
virFileDeleteTree(fakerootdir);