Linux netfilter at some point inverted the meaning of the '--ctdir reply'
and newer netfilter implementations now expect '--ctdir original'
instead and vice-versa.
We probe for this netfilter change via a UDP message over loopback and 3
filtering rules applied to INPUT two times, one time with '--ctdir original'
which should then work on 'fixed' netfilter and one other time with
'--ctdir reply' which should only work on the 'old' netfilter.
If neither one of the tests gets the data through, then the loopback device
is probably not configured correctly. If both tests get the data through
something must be seriously wrong. In both of these two latter cases
no '--ctdir' will then be applied to the rules.
Signed-off-by: Stefan Berger <stefanb(a)linux.vnet.ibm.com>
---
v2->v3:
- probing with --ctdir original and --ctdir reply
v1->v2:
- using virSocketAddrParseIPv4
---
src/nwfilter/nwfilter_ebiptables_driver.c | 169
++++++++++++++++++++++++++++++
1 file changed, 169 insertions(+)
Index: libvirt-acl/src/nwfilter/nwfilter_ebiptables_driver.c
===================================================================
--- libvirt-acl.orig/src/nwfilter/nwfilter_ebiptables_driver.c
+++ libvirt-acl/src/nwfilter/nwfilter_ebiptables_driver.c
@@ -27,6 +27,10 @@
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <unistd.h>
#include "internal.h"
@@ -85,6 +89,17 @@ static char *iptables_cmd_path;
static char *ip6tables_cmd_path;
static char *grep_cmd_path;
+/*
+ * --ctdir original vs. --ctdir reply's meaning was inverted in netfilter
+ * at some point. We probe for it.
+ */
+enum ctdirStatus {
+ CTDIR_STATUS_UNKNOWN = 0,
+ CTDIR_STATUS_CORRECTED = (1 << 0),
+ CTDIR_STATUS_OLD = (1 << 1),
+};
+static enum ctdirStatus iptables_ctdir_corrected;
+
#define PRINT_ROOT_CHAIN(buf, prefix, ifname) \
snprintf(buf, sizeof(buf), "libvirt-%c-%s", prefix, ifname)
#define PRINT_CHAIN(buf, prefix, ifname, suffix) \
@@ -1262,6 +1277,17 @@ iptablesEnforceDirection(int directionIn
virNWFilterRuleDefPtr rule,
virBufferPtr buf)
{
+ switch (iptables_ctdir_corrected) {
+ case CTDIR_STATUS_UNKNOWN:
+ /* could not be determined or s.th. is seriously wrong */
+ return;
+ case CTDIR_STATUS_CORRECTED:
+ directionIn = !directionIn;
+ break;
+ case CTDIR_STATUS_OLD:
+ break;
+ }
+
if (rule->tt != VIR_NWFILTER_RULE_DIRECTION_INOUT)
virBufferAsprintf(buf, " -m conntrack --ctdir %s",
(directionIn) ? "Original"
@@ -4304,6 +4330,146 @@ ebiptablesDriverTestCLITools(void)
return ret;
}
+static void
+ebiptablesDriverProbeCtdir(void)
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ static const char cmdline[] =
+ "$IPT -%c INPUT %c -i lo -p udp --dport %hu "
+ "-m state --state ESTABLISHED -j ACCEPT " CMD_SEPARATOR
+ "$IPT -%c INPUT %c -i lo -p udp --dport %hu "
+ "-m conntrack --ctdir %s -j ACCEPT " CMD_SEPARATOR
+ "$IPT -%c INPUT %c -i lo -p udp --dport %hu -j DROP";
+ /*
+ * Above '--ctdir original' gets this test to receive a message on
+ * 'fixed' netfilter.
+ */
+ unsigned short port;
+ int ssockfd = -1, csockfd = -1;
+ virSocketAddr saddr;
+ struct sockaddr_in *serveraddr = &saddr.data.inet4;
+ fd_set readfds;
+ struct timeval timeout = {
+ .tv_sec = 0,
+ .tv_usec = 1000 * 200,
+ };
+ int n, i, results = 0;
+ const char *ctdiropts[2] = { "original", "reply" };
+ unsigned char data[10];
+
+ if (virSocketAddrParseIPv4(&saddr, "127.0.0.1") < 0) {
+ VIR_ERROR(_("Could not parse IP address"));
+ goto cleanup;
+ }
+
+ if ((ssockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0 ||
+ (csockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ VIR_ERROR(_("Could not open UDP socket"));
+ goto cleanup;
+ }
+
+ for (port = 0xffff; port > 1024; port--) {
+ serveraddr->sin_port = htons(port);
+ if (bind(ssockfd, (struct sockaddr *)serveraddr,
+ sizeof(*serveraddr)) == 0)
+ break;
+ }
+ if (port == 1024) {
+ VIR_ERROR(_("Could not bind to any UDP socket"));
+ goto cleanup;
+ }
+
+ i = 0;
+ while (true) {
+ NWFILTER_SET_IPTABLES_SHELLVAR(&buf);
+ virBufferAsprintf(&buf, cmdline,
+ 'I', '1', port,
+ 'I', '2', port, ctdiropts[i],
+ 'I', '3', port);
+
+ if (virBufferError(&buf)) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (ebiptablesExecCLI(&buf, NULL, NULL) < 0) {
+ VIR_ERROR(_("Could not apply iptables rules"));
+ goto cleanup_iptables;
+ }
+
+ virBufferFreeAndReset(&buf);
+
+ if (sendto(csockfd, cmdline, 1, 0, (struct sockaddr *)serveraddr,
+ sizeof(*serveraddr)) < 0) {
+ VIR_ERROR(_("Could not send to UDP socket"));
+ goto cleanup_iptables;
+ }
+
+ FD_ZERO(&readfds);
+ FD_SET(ssockfd, &readfds);
+
+ while (true) {
+ n = select(ssockfd + 1, &readfds, NULL, NULL, &timeout);
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ VIR_ERROR(_("Select failed"));
+ goto cleanup_iptables;
+ }
+ if (n == 0) {
+ VIR_INFO("Ctdir probing received no data");
+ break;
+ }
+ VIR_INFO("Ctdir probing received data");
+ results |= (1 << i);
+ read(ssockfd, data, sizeof(data));
+ break;
+ }
+
+ if (i + 1 == ARRAY_CARDINALITY(ctdiropts))
+ break;
+
+ NWFILTER_SET_IPTABLES_SHELLVAR(&buf);
+ virBufferAsprintf(&buf, cmdline,
+ 'D', ' ', port,
+ 'D', ' ', port, ctdiropts[i],
+ 'D', ' ', port);
+ ebiptablesExecCLI(&buf, NULL, NULL);
+
+ i++;
+ }
+
+ switch (results) {
+ case 0x0:
+ /* no test passed -- loopback device not setup? */
+ case 0x3:
+ /* both test passed -- s.th. is wrong */
+ iptables_ctdir_corrected = CTDIR_STATUS_UNKNOWN;
+ break;
+ case 0x1:
+ iptables_ctdir_corrected = CTDIR_STATUS_CORRECTED;
+ break;
+ case 0x2:
+ iptables_ctdir_corrected = CTDIR_STATUS_OLD;
+ break;
+ }
+
+cleanup_iptables:
+ virBufferFreeAndReset(&buf);
+
+ NWFILTER_SET_IPTABLES_SHELLVAR(&buf);
+ virBufferAsprintf(&buf, cmdline,
+ 'D', ' ', port,
+ 'D', ' ', port, ctdiropts[i],
+ 'D', ' ', port);
+ ebiptablesExecCLI(&buf, NULL, NULL);
+
+cleanup:
+ virBufferFreeAndReset(&buf);
+ VIR_FORCE_CLOSE(ssockfd);
+ VIR_FORCE_CLOSE(csockfd);
+}
+
static int
ebiptablesDriverInit(bool privileged)
{
@@ -4341,6 +4507,9 @@ ebiptablesDriverInit(bool privileged)
return -ENOTSUP;
}
+ if (iptables_cmd_path)
+ ebiptablesDriverProbeCtdir();
+
ebiptables_driver.flags = TECHDRV_FLAG_INITIALIZED;
return 0;