In the past there have been hard to recreate issues where XIVE changes cause qemu crashes due to multi-socket interrupts such as in [1]. Add a functional test explicitly to test whether remote interrupts work. The test can also work as additional boot test for multi-socket boot, initrd boot test, as well as a check for e1000e to be working in powernv, though that's not a target goal, and are additional benefits. From docs/system/devices/net.rst: In order to check that the user mode network is working, you can ping the address 10.0.2.2 and verify that you got an address in the range 10.0.2.x from the QEMU virtual DHCP server. Hence use 10.0.2.2 with ping. [1]: https://lore.kernel.org/qemu-devel/baf6c854-832b-4a2e-922f-d34e6dadf821@redh... Tested-by: Shivang Upadhyay <shivangu@linux.ibm.com> Reviewed-by: Shivang Upadhyay <shivangu@linux.ibm.com> Signed-off-by: Aditya Gupta <adityag@linux.ibm.com> --- tests/functional/ppc64/test_powernv.py | 95 ++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tests/functional/ppc64/test_powernv.py b/tests/functional/ppc64/test_powernv.py index 0ea6c93e4287..9760f58ee41e 100755 --- a/tests/functional/ppc64/test_powernv.py +++ b/tests/functional/ppc64/test_powernv.py @@ -9,6 +9,7 @@ from qemu_test import LinuxKernelTest, Asset from qemu_test import wait_for_console_pattern +from qemu_test import exec_command_and_wait_for_pattern class PowernvMachine(LinuxKernelTest): @@ -27,6 +28,17 @@ class PowernvMachine(LinuxKernelTest): 'buildroot/qemu_ppc64le_powernv8-2025.02/rootfs.ext2'), 'aee2192b692077c4bde31cb56ce474424b358f17cec323d5c94af3970c9aada2') + def shell_exec_command_check_fail(self, command): + fail_msg="Fail" + self.shell_exec_command(f"export __FAIL_MSG={fail_msg}") + + # If the exit code for the command is non 0, print fail message + command = command + " || echo $__FAIL_MSG" + exec_command_and_wait_for_pattern(self, command, '#', fail_msg) + + def shell_exec_command(self, command): + exec_command_and_wait_for_pattern(self, command, '#', self.panic_message) + def do_test_linux_boot(self, command_line = KERNEL_COMMON_COMMAND_LINE): self.require_accelerator("tcg") kernel_path = self.ASSET_KERNEL.fetch() @@ -71,6 +83,89 @@ def test_linux_smt_boot(self): wait_for_console_pattern(self, console_pattern, self.panic_message) wait_for_console_pattern(self, self.good_message, self.panic_message) + def test_linux_remote_interrupts(self): + self.require_accelerator("tcg") + self.set_machine('powernv') + + # Have below setup in this test: + # 1. e1000e attached to pcie.6, which is from 7th PHB, belonging to 2nd + # socket (chip 1), in a powernv boot with default 6 PHBs per socket + # 2. CPU on 2nd socket (chip 1) disabled + # 3. RX IRQ's affinity to chip 2, and TX IRQ's affinity to chip 3 + # + # Then ping is done, to generate interrupts from e1000e which should go + # to IRQ server on the remote sockets + self.vm.add_args('-smp', '4,sockets=4,threads=1') + self.vm.add_args('-netdev', 'user,id=net0') + self.vm.add_args('-device', 'e1000e,netdev=net0,bus=pcie.6') + + kernel_path = self.ASSET_KERNEL.fetch() + rootfs_path = self.ASSET_INITRD.fetch() + self.vm.set_console() + self.vm.add_args('-kernel', kernel_path, + '-drive', + f'file={rootfs_path},format=raw,if=none,id=drive0,readonly=on', + '-append', 'root=/dev/nvme0n1 console=hvc0', + '-device', 'nvme,drive=drive0,bus=pcie.2,addr=0x0,serial=1234') + self.vm.launch() + + # Wait for boot to complete + console_pattern = 'CPU maps initialized for 1 thread per core' + wait_for_console_pattern(self, console_pattern, self.panic_message) + console_pattern = 'smp: Brought up 4 nodes, 4 CPUs' + wait_for_console_pattern(self, console_pattern, self.panic_message) + wait_for_console_pattern(self, 'Run /sbin/init as init process', + self.panic_message) + + # Wait for login prompt and login as root (no password in buildroot) + wait_for_console_pattern(self, 'login:', self.panic_message) + exec_command_and_wait_for_pattern(self, 'root', '#', self.panic_message) + + # e1000e is connected to socket 1, disable the CPU on socket 1 + self.shell_exec_command("echo 0 > /sys/devices/system/cpu/cpu1/online") + self.shell_exec_command( + "export CPU1_STATE=$(cat /sys/devices/system/cpu/cpu1/online)") + self.shell_exec_command_check_fail("[ $CPU1_STATE -eq 0 ]") + + # RX, TX interrupts to chip/cpu 2 & 3 respectively + self.shell_exec_command( + "export RX_IRQ=$(awk '/eth0-rx/ {print $1}' /proc/interrupts | tr -d ':')") + self.shell_exec_command( + "export TX_IRQ=$(awk '/eth0-tx/ {print $1}' /proc/interrupts | tr -d ':')") + self.shell_exec_command("echo 2 > /proc/irq/$RX_IRQ/smp_affinity_list") + self.shell_exec_command("echo 3 > /proc/irq/$TX_IRQ/smp_affinity_list") + + # Capture interrupt counts before generating traffic + self.shell_exec_command( + "export RX_BEFORE=$(awk '/eth0-rx/ {print $3}' /proc/interrupts)") + self.shell_exec_command( + "export TX_BEFORE=$(awk '/eth0-tx/ {print $4}' /proc/interrupts)") + + # Wait up to 15 seconds for eth0 link to come up + self.shell_exec_command( + "c=0; while ! ip addr show eth0 | grep 'inet 10.0.2'; do " + "sleep 1; c=$((c+1)); [ $c -gt 15 ] && break; done") + + self.shell_exec_command_check_fail( + "ip addr show eth0 | grep 'inet 10.0.2'") + + # Generate network traffic to trigger remote interrupts + # Ping QEMU's user-mode network gateway (10.0.2.2) + self.shell_exec_command("ping -W2 -c5 10.0.2.2") + + # Show final interrupt counts to verify remote interrupts occurred + self.shell_exec_command("cat /proc/interrupts | grep eth0") + + # Verify interrupt counts increased (whether interrupts were delivered) + self.shell_exec_command( + "export RX_AFTER=$(awk '/eth0-rx/ {print $3}' /proc/interrupts)") + self.shell_exec_command( + "export TX_AFTER=$(awk '/eth0-tx/ {print $4}' /proc/interrupts)") + + # Check that interrupt counts increased + self.shell_exec_command_check_fail("[ $RX_AFTER -gt $RX_BEFORE ]") + self.shell_exec_command_check_fail("[ $TX_AFTER -gt $TX_BEFORE ]") + def test_linux_big_boot(self): self.set_machine('powernv') self.vm.add_args('-smp', '16,threads=4,cores=2,sockets=2') -- 2.54.0