Resolves issue: https://gitlab.com/libvirt/libvirt/-/issues/603 Benchmarks showed that the amount of iifname jumps for each interface is the cause for this. Switched the nftables driver towards a vmap (verdict map) so we can have 1 rule that jumps to the correct root input/output chain per interface. Which improves throughput as when the number of interface check and jump rules increases the throughput decreases. The issue describes the interface matching works using the interface name and the majority of the effort is the strncpy, this commit also switches nftables to an interface_index compare instead. However, just using the interface_index is not enough, the amount of oif and iif jump rules causes quite a performance issue, the vmap instead solves this. Split rules into separate tables: "libvirt-nwfilter-ethernet" and "libvirt-nwfilter-other" to preserve existing ebip firewall behavior. Reworked chain logic for clarity with root -input/-output chains per interface. input in the VM interface is filtered in the -input chain(s), output out of the VM inteface is filtered in the -output chain(s). Stuck with 2 tables for compatibility reasons with eb iptables, unifying into 1 table will break users firewall definitions, which depend on being able to do accepts on ethernet rules (which currently get defined via ebtables) and additional filtering via the ip rules (which currently get defined via ip(6)tables). The nwfilter_nftables_driver keeps splitting the ethernet and non ethernet (other) rules in seperate tables “libvirt-nwfilter-ethernet” and “libvirt-nwfilter-other”. Rewrote chain logic, so it is easier to understand, input in the VM interface is filtered in the -input chain(s), output out of the VM inteface is filtered in the -output chain(s). -ethernet and -other table follow the same style and hook in the same way. Simplified conntrack handling: rules with accept+conntrack are duplicated to the opposite chain for symmetric behavior, to support the existing ebiptables logic. Firewall updates continue use tmp names for atomic replacement. Unsupported nwfilter features (for now): - STP filtering - Gratuitous ARP filtering - IPSets (potential future support via nft sets) Signed-off-by: Dion Bosschieter <dionbosschieter@gmail.com> --- po/POTFILES | 2 + src/nwfilter/meson.build | 1 + src/nwfilter/nwfilter_nftables_driver.c | 2374 +++++++++++++++++++++++ src/nwfilter/nwfilter_nftables_driver.h | 28 + 4 files changed, 2405 insertions(+) create mode 100644 src/nwfilter/nwfilter_nftables_driver.c create mode 100644 src/nwfilter/nwfilter_nftables_driver.h diff --git a/po/POTFILES b/po/POTFILES index 23da794f84..fa28239104 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -162,6 +162,8 @@ src/nwfilter/nwfilter_driver.c src/nwfilter/nwfilter_ebiptables_driver.c src/nwfilter/nwfilter_gentech_driver.c src/nwfilter/nwfilter_learnipaddr.c +src/nwfilter/nwfilter_nftables_driver.c +src/nwfilter/nwfilter_tech_driver.c src/openvz/openvz_conf.c src/openvz/openvz_driver.c src/openvz/openvz_util.c diff --git a/src/nwfilter/meson.build b/src/nwfilter/meson.build index 9e8a4797c5..a94d72d570 100644 --- a/src/nwfilter/meson.build +++ b/src/nwfilter/meson.build @@ -5,6 +5,7 @@ nwfilter_driver_sources = [ 'nwfilter_dhcpsnoop.c', 'nwfilter_ebiptables_driver.c', 'nwfilter_learnipaddr.c', + 'nwfilter_nftables_driver.c', ] driver_source_files += files(nwfilter_driver_sources) diff --git a/src/nwfilter/nwfilter_nftables_driver.c b/src/nwfilter/nwfilter_nftables_driver.c new file mode 100644 index 0000000000..36a6c63f22 --- /dev/null +++ b/src/nwfilter/nwfilter_nftables_driver.c @@ -0,0 +1,2374 @@ +/* + * nwfilter_nftables_driver.c: driver for nftables on tap devices + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "internal.h" + +#include "virbuffer.h" +#include "viralloc.h" +#include "virlog.h" +#include "virerror.h" +#include "nwfilter_conf.h" +#include "nwfilter_nftables_driver.h" +#include "nwfilter_tech_driver.h" +#include "virfile.h" +#include "configmake.h" +#include "virstring.h" +#include "virfirewall.h" + +#define VIR_FROM_THIS VIR_FROM_NWFILTER + +/* define nftable root table */ +#define NF_ETHERNET_TABLE "libvirt-nwfilter-ethernet" +#define NF_OTHER_TABLE "libvirt-nwfilter-other" +#define NF_COMMENT "{ comment \"this table is managed by libvirt\"; }" +/* nftables counter can be enabled for firewalls transparency */ +#ifndef NF_COUNTER +# define NF_COUNTER 0 +#endif + +/* define chains */ +#define IN_CHAIN "postrouting" +#define OUT_CHAIN "prerouting" +#define FORWARD_CHAIN "forward" + +#define IN_IFMATCH "oif" +#define OUT_IFMATCH "iif" + +#define DEFAULT_POLICY "accept" + +#ifndef NF_TRACE +# define NF_TRACE 0 +#endif +#if NF_TRACE +# define TRACE_SETTING "meta nftrace set 1;" +#else +# define TRACE_SETTING "" +#endif + +#define CHAINSETTINGS "{ }" + +#define VMAP_IN "vmap-oif" +#define VMAP_OUT "vmap-iif" +#define VMAPSETTINGS "{ type iface_index: verdict; }" + +#define ROOT_CHAINSETTINGS(chain, defaultPolicy) \ + "{ type filter hook "chain" priority %d;" \ + " policy "defaultPolicy"; "TRACE_SETTING" }" + +VIR_LOG_INIT("nwfilter.nwfilter_nftables_driver"); + +/* A lookup table for translating ethernet protocol IDs to human readable + * strings. None of the human readable strings must be found as a prefix + * in another entry here (example 'ab' would be found in 'abc') to allow + * for prefix matching. + */ +static const struct ushort_map l3_protocols[] = { + USHORTMAP_ENTRY_IDX(L3_PROTO_IPV4_IDX, ETHERTYPE_IP, "ipv4"), + USHORTMAP_ENTRY_IDX(L3_PROTO_IPV6_IDX, ETHERTYPE_IPV6, "ipv6"), + USHORTMAP_ENTRY_IDX(L3_PROTO_ARP_IDX, ETHERTYPE_ARP, "arp"), + USHORTMAP_ENTRY_IDX(L3_PROTO_RARP_IDX, ETHERTYPE_REVARP, "rarp"), + USHORTMAP_ENTRY_IDX(L2_PROTO_VLAN_IDX, ETHERTYPE_VLAN, "vlan"), + USHORTMAP_ENTRY_IDX(L2_PROTO_STP_IDX, 0, "stp"), + USHORTMAP_ENTRY_IDX(L2_PROTO_MAC_IDX, 0, "mac"), + USHORTMAP_ENTRY_IDX(L3_PROTO_LAST_IDX, 0, NULL), +}; + +/* + * Given a filtername determine the protocol it is used for evaluating + * We do prefix-matching to determine the protocol. + */ +static enum l3_proto_idx +nftablesGetProtoIdxByFiltername(const char *filtername) +{ + enum l3_proto_idx idx; + + for (idx = 0; idx < L3_PROTO_LAST_IDX; idx++) { + if (STRPREFIX(filtername, l3_protocols[idx].val)) + return idx; + } + + return -1; +} + +static void nftablesCreateTable(virFirewall *fw, + virFirewallLayer layer, + const char *tableName) +{ + virFirewallCmd *fwrule = NULL; + int tablePriority = STREQ(tableName, NF_ETHERNET_TABLE) ? 0 : 1; + + /* define table */ + virFirewallAddCmd(fw, layer, + "add", "table", "bridge", + tableName, NF_COMMENT, NULL); + + /* create vmap for iface matches */ + virFirewallAddCmd(fw, layer, "add", "map", "bridge", tableName, VMAP_IN, + VMAPSETTINGS, NULL); + virFirewallAddCmd(fw, layer, "add", "map", "bridge", tableName, VMAP_OUT, + VMAPSETTINGS, NULL); + + /* define default chains */ + fwrule = virFirewallAddCmd(fw, layer, "add", "chain", "bridge", + tableName, IN_CHAIN, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, + ROOT_CHAINSETTINGS(IN_CHAIN, DEFAULT_POLICY), + tablePriority); + fwrule = virFirewallAddCmd(fw, layer, "add", "chain", "bridge", + tableName, OUT_CHAIN, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, + ROOT_CHAINSETTINGS(OUT_CHAIN, DEFAULT_POLICY), + tablePriority); + + /* add the one jump rule based on the vmap */ + fwrule = virFirewallAddCmd(fw, layer, "add", "rule", "bridge", tableName, + IN_CHAIN, IN_IFMATCH, "vmap", NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "@%s", VMAP_IN); + fwrule = virFirewallAddCmd(fw, layer, "add", "rule", "bridge", tableName, + OUT_CHAIN, OUT_IFMATCH, "vmap", NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "@%s", VMAP_OUT); +} + +static int +nftablesHandleCreateRootTables(virFirewall *fw, + virFirewallLayer layer, + const char *const *lines, + void *opaque G_GNUC_UNUSED) +{ + bool ethernetTableDefined = false; + bool otherTableDefined = false; + size_t i; + + /* parse nft tables list output to see if tables exist */ + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + if ((line = STRSKIP(line, "table bridge ")) == NULL) { + continue; + } + + VIR_DEBUG("Considering table for comparison '%s'", lines[i]); + + /* if chain matches basechain */ + if (STRPREFIX(line, NF_ETHERNET_TABLE)) { + ethernetTableDefined = true; + } else if (STRPREFIX(line, NF_OTHER_TABLE)) { + otherTableDefined = true; + } + } + + /* if the ethernet table doesn't exist, + * we create it including the default chains*/ + if (!ethernetTableDefined) + nftablesCreateTable(fw, layer, NF_ETHERNET_TABLE); + /* if the non ethernet table (other) doesn't exist, + * we create it including the default chains */ + if (!otherTableDefined) + nftablesCreateTable(fw, layer, NF_OTHER_TABLE); + + return 0; +} + +static void nftablesAddCmdAction(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterRuleActionType action) +{ + switch (action) { + case VIR_NWFILTER_RULE_ACTION_ACCEPT: + virFirewallCmdAddArg(fw, fwrule, "accept"); + break; + case VIR_NWFILTER_RULE_ACTION_DROP: + virFirewallCmdAddArg(fw, fwrule, "drop"); + break; + case VIR_NWFILTER_RULE_ACTION_REJECT: + virFirewallCmdAddArg(fw, fwrule, "drop"); + break; + case VIR_NWFILTER_RULE_ACTION_RETURN: + virFirewallCmdAddArg(fw, fwrule, "return"); + break; + case VIR_NWFILTER_RULE_ACTION_CONTINUE: + virFirewallCmdAddArg(fw, fwrule, "continue"); + break; + case VIR_NWFILTER_RULE_ACTION_LAST: + default: + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected action %1$d"), action); + } +} + +static const char *nftablesGetProtocolType(int protocol) +{ + switch (protocol) { + case VIR_NWFILTER_RULE_PROTOCOL_TCP: + case VIR_NWFILTER_RULE_PROTOCOL_TCPoIPV6: + return "tcp"; + case VIR_NWFILTER_RULE_PROTOCOL_UDP: + case VIR_NWFILTER_RULE_PROTOCOL_UDPoIPV6: + return "udp"; + case VIR_NWFILTER_RULE_PROTOCOL_UDPLITE: + case VIR_NWFILTER_RULE_PROTOCOL_UDPLITEoIPV6: + return "udplite"; + case VIR_NWFILTER_RULE_PROTOCOL_ESP: + case VIR_NWFILTER_RULE_PROTOCOL_ESPoIPV6: + return "esp"; + case VIR_NWFILTER_RULE_PROTOCOL_AH: + case VIR_NWFILTER_RULE_PROTOCOL_AHoIPV6: + return "ah"; + case VIR_NWFILTER_RULE_PROTOCOL_SCTP: + case VIR_NWFILTER_RULE_PROTOCOL_SCTPoIPV6: + return "sctp"; + case VIR_NWFILTER_RULE_PROTOCOL_ICMP: + return "icmp"; + case VIR_NWFILTER_RULE_PROTOCOL_ICMPV6: + return "icmpv6"; + case VIR_NWFILTER_RULE_PROTOCOL_IGMP: + return "igmp"; + case VIR_NWFILTER_RULE_PROTOCOL_ALL: + case VIR_NWFILTER_RULE_PROTOCOL_ALLoIPV6: + return "all"; + default: + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected protocol %1$d"), + protocol); + return ""; + } +} + +static const char * +nftablesGetIpTypeByDataType(nwItemDesc *item) +{ + return (item->datatype == DATATYPE_IPV6ADDR) ? "ip6" : "ip"; +} + +static int +nftablesHandleIPHdr(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + ipHdrDataDef *ipHdr, + bool reverseRule) +{ + char ipaddr[INET6_ADDRSTRLEN]; + char ipaddralt[INET6_ADDRSTRLEN]; + char number[VIR_INT64_STR_BUFLEN]; + const char *ip = NULL; + const char *saddr = reverseRule ? "daddr" : "saddr"; + const char *daddr = reverseRule ? "saddr" : "daddr"; + + if (HAS_ENTRY_ITEM(&ipHdr->dataSrcIPAddr)) { + ip = nftablesGetIpTypeByDataType(&ipHdr->dataSrcIPAddr); + virFirewallCmdAddArgList(fw, fwrule, ip, saddr, NULL); + + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &ipHdr->dataSrcIPAddr) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataSrcIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!"); + + if (HAS_ENTRY_ITEM(&ipHdr->dataSrcIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &ipHdr->dataSrcIPMask) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } else if (HAS_ENTRY_ITEM(&ipHdr->dataSrcIPFrom)) { + ip = nftablesGetIpTypeByDataType(&ipHdr->dataSrcIPFrom); + virFirewallCmdAddArgList(fw, fwrule, ip, saddr, NULL); + + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &ipHdr->dataSrcIPFrom) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataSrcIPFrom)) + virFirewallCmdAddArg(fw, fwrule, "!"); + + if (HAS_ENTRY_ITEM(&ipHdr->dataSrcIPTo)) { + + if (printDataType(vars, + ipaddralt, sizeof(ipaddralt), + &ipHdr->dataSrcIPTo) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "%s-%s", ipaddr, ipaddralt); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } + + if (HAS_ENTRY_ITEM(&ipHdr->dataDstIPAddr)) { + ip = nftablesGetIpTypeByDataType(&ipHdr->dataDstIPAddr); + virFirewallCmdAddArgList(fw, fwrule, ip, daddr, NULL); + + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &ipHdr->dataDstIPAddr) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataDstIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!"); + + if (HAS_ENTRY_ITEM(&ipHdr->dataDstIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &ipHdr->dataDstIPMask) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } else if (HAS_ENTRY_ITEM(&ipHdr->dataDstIPFrom)) { + ip = nftablesGetIpTypeByDataType(&ipHdr->dataDstIPFrom); + virFirewallCmdAddArgList(fw, fwrule, ip, daddr, NULL); + + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &ipHdr->dataDstIPFrom) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataDstIPFrom)) + virFirewallCmdAddArg(fw, fwrule, "!"); + + if (HAS_ENTRY_ITEM(&ipHdr->dataDstIPTo)) { + if (printDataType(vars, + ipaddralt, sizeof(ipaddralt), + &ipHdr->dataDstIPTo) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "%s-%s", ipaddr, ipaddralt); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } + + if (HAS_ENTRY_ITEM(&ipHdr->dataDSCP)) { + if (!ip) + ip = nftablesGetIpTypeByDataType(&ipHdr->dataDSCP); + + if (printDataType(vars, + number, sizeof(number), + &ipHdr->dataDSCP) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, ip, "dscp", NULL); + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataDSCP)) + virFirewallCmdAddArg(fw, fwrule, "!"); + virFirewallCmdAddArgList(fw, fwrule, number, NULL); + } + + return 0; +} + +static int +nftablesHandleEthHdr(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + ethHdrDataDef *ethHdr, + bool reverseRule) +{ + char macaddr[VIR_MAC_STRING_BUFLEN]; + char macmask[VIR_MAC_STRING_BUFLEN]; + const char *saddr = reverseRule ? "daddr" : "saddr"; + const char *daddr = reverseRule ? "saddr" : "daddr"; + + if (HAS_ENTRY_ITEM(ðHdr->dataSrcMACAddr)) { + const char *comparison = NULL; + if (printDataType(vars, + macaddr, sizeof(macaddr), + ðHdr->dataSrcMACAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ether", saddr, NULL); + comparison = ENTRY_WANT_NEG_SIGN(ðHdr->dataSrcMACAddr) ? + "!=" : "=="; + + if (HAS_ENTRY_ITEM(ðHdr->dataSrcMACMask)) { + if (printDataType(vars, + macmask, sizeof(macmask), + ðHdr->dataSrcMACMask) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "& %s %s %s", + macmask, comparison, macaddr); + } else { + virFirewallCmdAddArgList(fw, fwrule, comparison, macaddr, NULL); + } + } + + if (HAS_ENTRY_ITEM(ðHdr->dataDstMACAddr)) { + const char *comparison = NULL; + if (printDataType(vars, + macaddr, sizeof(macaddr), + ðHdr->dataDstMACAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ether", daddr, NULL); + comparison = ENTRY_WANT_NEG_SIGN(ðHdr->dataDstMACAddr) ? + "!=" : "=="; + + if (HAS_ENTRY_ITEM(ðHdr->dataDstMACMask)) { + if (printDataType(vars, + macmask, sizeof(macmask), + ðHdr->dataDstMACMask) < 0) + return -1; + + virFirewallCmdAddArgFormat(fw, fwrule, + "& %s %s %s", + macmask, comparison, macaddr); + } else { + virFirewallCmdAddArgList(fw, fwrule, comparison, macaddr, NULL); + } + } + + return 0; +} + +static int +insertRuleArg2Param(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + nwItemDesc *itemLow, + nwItemDesc *itemHigh, + const char *argument, + const char *seperator) +{ + char field[VIR_INT64_STR_BUFLEN]; + char fieldalt[VIR_INT64_STR_BUFLEN]; + + if (HAS_ENTRY_ITEM(itemLow)) { + if (printDataType(vars, + field, sizeof(field), + itemLow) < 0) + return -1; + virFirewallCmdAddArg(fw, fwrule, argument); + if (ENTRY_WANT_NEG_SIGN(itemLow)) + virFirewallCmdAddArg(fw, fwrule, "!="); + if (HAS_ENTRY_ITEM(itemHigh)) { + if (printDataType(vars, + fieldalt, sizeof(fieldalt), + itemHigh) < 0) + return -1; + virFirewallCmdAddArgFormat(fw, fwrule, + "%s%s%s", field, seperator, fieldalt); + } else { + virFirewallCmdAddArg(fw, fwrule, field); + } + } + + return 0; +} + +static int +nftablesHandlePortData(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + const char *protocol, + portDataDef *portData, + bool reverseRule) +{ + char dport[VIR_INT64_STR_BUFLEN]; + char sport[VIR_INT64_STR_BUFLEN]; + + g_snprintf(dport, sizeof(dport), reverseRule ? "%s sport" : "%s dport", + protocol); + g_snprintf(sport, sizeof(sport), reverseRule ? "%s dport": "%s sport", + protocol); + + if (insertRuleArg2Param(fw, fwrule, vars, + &portData->dataDstPortStart, + &portData->dataDstPortEnd, dport, "-") < 0) + return -1; + if (insertRuleArg2Param(fw, fwrule, vars, + &portData->dataSrcPortStart, + &portData->dataSrcPortEnd, sport, "-") < 0) + return -1; + + return 0; +} + +static int +nftablesHandleMacAddr(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + nwItemDesc *macaddr, + const char *argument) +{ + char macstr[VIR_MAC_STRING_BUFLEN]; + + if (HAS_ENTRY_ITEM(macaddr)) { + if (printDataType(vars, + macstr, sizeof(macstr), + macaddr) < 0) + return -1; + + virFirewallCmdAddArg(fw, fwrule, argument); + if (ENTRY_WANT_NEG_SIGN(macaddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + virFirewallCmdAddArg(fw, fwrule, macstr); + } + + return 0; +} + +static int +nftablesHandleSrcMacAddr(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + nwItemDesc *srcMacAddr) +{ + return nftablesHandleMacAddr(fw, fwrule, vars, srcMacAddr, "ether saddr"); +} + +static void +printStateMatchFlags(int32_t flags, char **bufptr) +{ + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; + virNWFilterPrintStateMatchFlags(&buf, "", flags, false); + + /* str to lower needed as nft doesn't accept upper case states */ + g_string_ascii_down(buf.str); + + *bufptr = virBufferContentAndReset(&buf); +} + +static bool +nftablesRuleNeedsConntrack(virNWFilterRuleDef *rule) +{ + /* ip only */ + if (virNWFilterRuleIsProtocolEthernet(rule)) { + return false; + } + + /* Skip conntrack if statematch=false flag has been set */ + if (rule->flags & RULE_FLAG_NO_STATEMATCH) { + return false; + } + + /* If no state flags are set and rule->action is not accept, + * we should skip conntrack */ + if (!(rule->flags & IPTABLES_STATE_FLAGS) && + rule->action != VIR_NWFILTER_RULE_ACTION_ACCEPT) { + return false; + } + + return true; +} + +static bool +nftablesRuleNeedsConnLimit(ipHdrDataDef *ipHdr, + bool directionIn) +{ + return HAS_ENTRY_ITEM(&ipHdr->dataConnlimitAbove) && !directionIn; +} + +static char * +nftablesPrintTCPFlags(uint8_t flags) +{ + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; + g_autofree char *flagsstr = NULL; + + if (flags == 0) { + virBufferAddLit(&buf, "0"); + } else if (flags == 0x3f) { + virBufferAddLit(&buf, "*"); + } else { + flagsstr = virNWFilterPrintTCPFlags(flags); + virBufferAdd(&buf, flagsstr, -1); + g_string_ascii_down(buf.str); + } + + return virBufferContentAndReset(&buf); +} + +/* + * nftablesHandleOtherRule: + * @fw: the firewall ruleset to add to + * @fwrule: the firewall command to add arguments to + * @vars : A map containing the variables to resolve + * @rule: The rule of the filter to convert + * @directionIn: direction of the rule, true for in false for out + * directionIn is needed for additional conntrack logic + * @reverseRule: Whether to reverse src and dst attributes + * ethernet reverse flag is set conntrack requires a reverse + * rule on the opposite chain + * + * Set arguments on fwrule based on given struct *rule + * + */ +static int +nftablesHandleOtherRule(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + virNWFilterRuleDef *rule, + bool directionIn, + bool reverseRule) +{ + char number[VIR_INT64_STR_BUFLEN]; + bool hasICMPType = false; + bool skipDirection = false; + g_autofree char *matchState = NULL; + ipHdrDataDef *ipHdr = NULL; + const char *protocol = nftablesGetProtocolType(rule->prtclType); + + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", NULL); + if (virNWFilterRuleIsProtocolIPv6(rule) && + !virNWFilterRuleIsProtocolIPv4(rule)) { + virFirewallCmdAddArg(fw, fwrule, "ip6"); + } else if (virNWFilterRuleIsProtocolIPv4(rule) && + !virNWFilterRuleIsProtocolIPv6(rule)) { + virFirewallCmdAddArg(fw, fwrule, "ip"); + } + + switch ((int)rule->prtclType) { + case VIR_NWFILTER_RULE_PROTOCOL_TCP: + case VIR_NWFILTER_RULE_PROTOCOL_TCPoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "tcp", NULL); + ipHdr = &rule->p.tcpHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.tcpHdrFilter.dataSrcMACAddr) < 0) + return -1; + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.tcpHdrFilter.dataTCPFlags)) { + g_autofree char *mask = NULL; + g_autofree char *flags = NULL; + + /* flags & syn == syn */ + virFirewallCmdAddArgList(fw, fwrule, "tcp", "flags", "&", NULL); + + if (!(mask = nftablesPrintTCPFlags( + rule->p.tcpHdrFilter.dataTCPFlags.u.tcpFlags.mask))) + return -1; + virFirewallCmdAddArgList(fw, fwrule, mask, ENTRY_WANT_NEG_SIGN( + &rule->p.tcpHdrFilter.dataTCPFlags) + ? "!=" : "==", NULL); + + if (!(flags = nftablesPrintTCPFlags( + rule->p.tcpHdrFilter.dataTCPFlags.u.tcpFlags.flags))) + return -1; + virFirewallCmdAddArgList(fw, fwrule, "{", flags, "}", NULL); + } + + if (HAS_ENTRY_ITEM(&rule->p.tcpHdrFilter.dataTCPOption)) { + if (printDataType(vars, number, sizeof(number), + &rule->p.tcpHdrFilter.dataTCPOption) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "tcp", "option", NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.tcpHdrFilter.dataTCPOption)) + virFirewallCmdAddArg(fw, fwrule, "!"); + virFirewallCmdAddArg(fw, fwrule, number); + } + + if (nftablesHandlePortData(fw, fwrule, vars, protocol, + &rule->p.tcpHdrFilter.portData, reverseRule) < 0) + return -1; + + break; + case VIR_NWFILTER_RULE_PROTOCOL_UDP: + case VIR_NWFILTER_RULE_PROTOCOL_UDPoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "udp", NULL); + ipHdr = &rule->p.udpHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.udpHdrFilter.dataSrcMACAddr) < 0) + return -1; + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + if (nftablesHandlePortData(fw, fwrule, vars, protocol, + &rule->p.udpHdrFilter.portData, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_UDPLITE: + case VIR_NWFILTER_RULE_PROTOCOL_UDPLITEoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "udplite", NULL); + ipHdr = &rule->p.udpliteHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.udpliteHdrFilter.dataSrcMACAddr) < 0) + return -1; + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_ESP: + case VIR_NWFILTER_RULE_PROTOCOL_ESPoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "esp", NULL); + ipHdr = &rule->p.espHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.espHdrFilter.dataSrcMACAddr) < 0) + return -1; + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_AH: + case VIR_NWFILTER_RULE_PROTOCOL_AHoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "ah", NULL); + ipHdr = &rule->p.ahHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.ahHdrFilter.dataSrcMACAddr) < 0) + return -1; + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_SCTP: + case VIR_NWFILTER_RULE_PROTOCOL_SCTPoIPV6: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "sctp", NULL); + ipHdr = &rule->p.sctpHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.sctpHdrFilter.dataSrcMACAddr) < 0) + return -1; + + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + + if (nftablesHandlePortData(fw, fwrule, vars, protocol, + &rule->p.sctpHdrFilter.portData, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_ICMP: + case VIR_NWFILTER_RULE_PROTOCOL_ICMPV6: + if (rule->prtclType == VIR_NWFILTER_RULE_PROTOCOL_ICMPV6) { + virFirewallCmdAddArgList(fw, fwrule, "ip6", "nexthdr", NULL); + } else { + virFirewallCmdAddArgList(fw, fwrule, "ip", "protocol", NULL); + } + virFirewallCmdAddArg(fw, fwrule, protocol); + + ipHdr = &rule->p.icmpHdrFilter.ipHdr; + hasICMPType = true; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.icmpHdrFilter.dataSrcMACAddr) < 0) + return -1; + + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.icmpHdrFilter.dataICMPType)) { + virFirewallCmdAddArgList(fw, fwrule, protocol, "type", NULL); + + if (printDataType(vars, + number, sizeof(number), + &rule->p.icmpHdrFilter.dataICMPType) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&rule->p.icmpHdrFilter.dataICMPType)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + virFirewallCmdAddArg(fw, fwrule, number); + + if (HAS_ENTRY_ITEM(&rule->p.icmpHdrFilter.dataICMPCode)) { + virFirewallCmdAddArgList(fw, fwrule, protocol, "code", NULL); + + if (printDataType(vars, + number, sizeof(number), + &rule->p.icmpHdrFilter.dataICMPCode) < 0) + return -1; + + if (ENTRY_WANT_NEG_SIGN(&rule->p.icmpHdrFilter.dataICMPCode)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + virFirewallCmdAddArg(fw, fwrule, number); + } + } + break; + case VIR_NWFILTER_RULE_PROTOCOL_IGMP: + virFirewallCmdAddArgList(fw, fwrule, "meta", "l4proto", "igmp", NULL); + ipHdr = &rule->p.igmpHdrFilter.ipHdr; + + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.igmpHdrFilter.dataSrcMACAddr) < 0) + return -1; + + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_ALL: + case VIR_NWFILTER_RULE_PROTOCOL_ALLoIPV6: + ipHdr = &rule->p.allHdrFilter.ipHdr; + if (nftablesHandleSrcMacAddr(fw, fwrule, vars, + &rule->p.allHdrFilter.dataSrcMACAddr) < 0) + return -1; + + if (nftablesHandleIPHdr(fw, fwrule, vars, ipHdr, reverseRule) < 0) + return -1; + break; + default: + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected protocol %1$d"), + rule->prtclType); + return -1; + } + + /* no support for ipset */ + if (HAS_ENTRY_ITEM(&ipHdr->dataIPSet) && + HAS_ENTRY_ITEM(&ipHdr->dataIPSetFlags)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Rule contains unsupported ipset flags")); + } + + /* apply conn limit only to outgoing connections */ + if (nftablesRuleNeedsConnLimit(ipHdr, directionIn)) { + if (printDataType(vars, + number, sizeof(number), + &ipHdr->dataConnlimitAbove) < 0) + return -1; + + /* place connlimit after potential state logic + since this is the most useful order */ + virFirewallCmdAddArgList(fw, fwrule, "ct", "count", "over", NULL); + if (ENTRY_WANT_NEG_SIGN(&ipHdr->dataConnlimitAbove)) + virFirewallCmdAddArg(fw, fwrule, "!="); + virFirewallCmdAddArgList(fw, fwrule, number, NULL); + } + + if (nftablesRuleNeedsConntrack(rule)) { + /* we skip direction when ct count is set or type is icmp */ + skipDirection = nftablesRuleNeedsConnLimit(ipHdr, directionIn) || + hasICMPType; + + /* no direction */ + if (!skipDirection) + /* reverse rules are replies, + * otherwise it is the originating direction */ + virFirewallCmdAddArgList(fw, fwrule, "ct", "direction", + (reverseRule ? "reply" : "original"), + NULL); + + if (rule->flags & IPTABLES_STATE_FLAGS && + !(rule->flags & RULE_FLAG_STATE_NONE)) { + printStateMatchFlags(rule->flags, &matchState); + } else { + /* static state match is needed because when no state flags + * have been set but statematch is enabled we need a default */ + /* reverse rules are established connections */ + matchState = g_strdup(reverseRule ? + "established" : + "new,established"); + } + virFirewallCmdAddArgList(fw, fwrule, "ct", "state", matchState, NULL); + } + + return 0; +} + +static int +insertRuleArgParam(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + nwItemDesc *item, + const char *argument) +{ + char field[VIR_INT64_STR_BUFLEN]; + + if (HAS_ENTRY_ITEM(item)) { + if (printDataType(vars, + field, sizeof(field), + item) < 0) + return -1; + virFirewallCmdAddArg(fw, fwrule, argument); + if (ENTRY_WANT_NEG_SIGN(item)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + virFirewallCmdAddArg(fw, fwrule, field); + } + + return 0; +} + +static int +insertRuleArgParamHex(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + nwItemDesc *item, + const char *argument) +{ + char field[VIR_INT64_STR_BUFLEN]; + + if (HAS_ENTRY_ITEM(item)) { + if (printDataTypeAsHex(vars, + field, sizeof(field), + item) < 0) + return -1; + virFirewallCmdAddArg(fw, fwrule, argument); + if (ENTRY_WANT_NEG_SIGN(item)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + virFirewallCmdAddArg(fw, fwrule, field); + } + + return 0; +} + +/* + * nftablesHandleEthernetRule: + * @fw: the firewall ruleset to add to + * @vars : A map containing the variables to resolve + * @rule: The rule of the filter to convert + * @reverseRule : Whether to reverse src and dst attributes + * ethernet reverse flag is set when direction='inout' is set + * + * Set arguments on fwrule based on given struct *rule + * + */ +static int +nftablesHandleEthernetRule(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterVarCombIter *vars, + virNWFilterRuleDef *rule, + bool reverseRule) +{ + char number[VIR_INT64_STR_BUFLEN]; + char ipaddr[INET_ADDRSTRLEN]; + char ipmask[INET_ADDRSTRLEN]; + char ipv6addr[INET6_ADDRSTRLEN]; + bool hasMask = false; + const char *saddr = reverseRule ? "daddr" : "saddr"; + const char *daddr = reverseRule ? "saddr" : "daddr"; + + switch ((int)rule->prtclType) { + case VIR_NWFILTER_RULE_PROTOCOL_MAC: + if (nftablesHandleEthHdr(fw, fwrule, + vars, + &rule->p.ethHdrFilter.ethHdr, reverseRule) < 0) + return -1; + + if (insertRuleArgParamHex(fw, fwrule, vars, + &rule->p.ethHdrFilter.dataProtocolID, + "ether type") < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_IP: + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.ipHdrFilter.ipHdr.dataProtocolID)) + virFirewallCmdAddArg(fw, fwrule, "!="); + virFirewallCmdAddArg(fw, fwrule, "ip"); + + if (nftablesHandleEthHdr(fw, fwrule, + vars, + &rule->p.ipHdrFilter.ethHdr, reverseRule) < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.ipHdrFilter.ipHdr.dataSrcIPAddr)) { + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &rule->p.ipHdrFilter.ipHdr.dataSrcIPAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ip", saddr, NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.ipHdrFilter.ipHdr.dataSrcIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + if (HAS_ENTRY_ITEM(&rule->p.ipHdrFilter.ipHdr.dataSrcIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &rule->p.ipHdrFilter.ipHdr.dataSrcIPMask) < 0) + return -1; + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } + + if (HAS_ENTRY_ITEM(&rule->p.ipHdrFilter.ipHdr.dataDstIPAddr)) { + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &rule->p.ipHdrFilter.ipHdr.dataDstIPAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ip", daddr, NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.ipHdrFilter.ipHdr.dataDstIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + if (HAS_ENTRY_ITEM(&rule->p.ipHdrFilter.ipHdr.dataDstIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &rule->p.ipHdrFilter.ipHdr.dataDstIPMask) < 0) + return -1; + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipaddr); + } + } + + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.ipHdrFilter.ipHdr.dataProtocolID, + "ip protocol") < 0) + return -1; + if (insertRuleArg2Param(fw, fwrule, vars, + &rule->p.ipHdrFilter.portData.dataSrcPortStart, + &rule->p.ipHdrFilter.portData.dataSrcPortEnd, + reverseRule ? "th dport" : "th sport", "-") < 0) + return -1; + if (insertRuleArg2Param(fw, fwrule, vars, + &rule->p.ipHdrFilter.portData.dataDstPortStart, + &rule->p.ipHdrFilter.portData.dataDstPortEnd, + reverseRule ? "th sport" : "th dport", "-") < 0) + return -1; + if (insertRuleArgParamHex(fw, fwrule, vars, + &rule->p.ipHdrFilter.ipHdr.dataDSCP, + "ip dscp") < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_ARP: + case VIR_NWFILTER_RULE_PROTOCOL_RARP: + if (nftablesHandleEthHdr(fw, fwrule, + vars, + &rule->p.arpHdrFilter.ethHdr, reverseRule) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "0x%x", + (rule->prtclType == VIR_NWFILTER_RULE_PROTOCOL_ARP) + ? l3_protocols[L3_PROTO_ARP_IDX].attr + : l3_protocols[L3_PROTO_RARP_IDX].attr); + + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.arpHdrFilter.dataHWType, + "arp htype") < 0) + return -1; + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.arpHdrFilter.dataOpcode, + "arp operation") < 0) + return -1; + if (insertRuleArgParamHex(fw, fwrule, vars, + &rule->p.arpHdrFilter.dataProtocolType, + "arp ptype") < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.arpHdrFilter.dataARPSrcIPAddr)) { + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &rule->p.arpHdrFilter.dataARPSrcIPAddr) < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.arpHdrFilter.dataARPSrcIPMask)) { + if (printDataType(vars, + ipmask, sizeof(ipmask), + &rule->p.arpHdrFilter.dataARPSrcIPMask) < 0) + return -1; + hasMask = true; + } + + virFirewallCmdAddArgList(fw, fwrule, "arp", saddr, "ip", NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.arpHdrFilter.dataARPSrcIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, hasMask ? ipmask : "32"); + } + + if (HAS_ENTRY_ITEM(&rule->p.arpHdrFilter.dataARPDstIPAddr)) { + if (printDataType(vars, + ipaddr, sizeof(ipaddr), + &rule->p.arpHdrFilter.dataARPDstIPAddr) < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.arpHdrFilter.dataARPDstIPMask)) { + if (printDataType(vars, + ipmask, sizeof(ipmask), + &rule->p.arpHdrFilter.dataARPDstIPMask) < 0) + return -1; + hasMask = true; + } + + virFirewallCmdAddArgList(fw, fwrule, "arp", daddr, "ip", NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.arpHdrFilter.dataARPDstIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipaddr, hasMask ? ipmask : "32"); + } + + if (nftablesHandleMacAddr(fw, fwrule, vars, + &rule->p.arpHdrFilter.dataARPSrcMACAddr, + reverseRule ? "ether daddr": "ether saddr") < 0) + return -1; + if (nftablesHandleMacAddr(fw, fwrule, vars, + &rule->p.arpHdrFilter.dataARPDstMACAddr, + reverseRule ? "ether saddr": "ether daddr") < 0) + return -1; + + if (HAS_ENTRY_ITEM(&rule->p.arpHdrFilter.dataGratuitousARP) && + rule->p.arpHdrFilter.dataGratuitousARP.u.boolean) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("GARP filtering in nftables is not supported")); + return -1; + } + break; + case VIR_NWFILTER_RULE_PROTOCOL_IPV6: + if (nftablesHandleEthHdr(fw, fwrule, + vars, + &rule->p.ipv6HdrFilter.ethHdr, reverseRule) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", "ip6", NULL); + + if (HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.ipHdr.dataSrcIPAddr)) { + if (printDataType(vars, + ipv6addr, sizeof(ipv6addr), + &rule->p.ipv6HdrFilter.ipHdr.dataSrcIPAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ip6", saddr, NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.ipv6HdrFilter.ipHdr.dataSrcIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + if (HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.ipHdr.dataSrcIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &rule->p.ipv6HdrFilter.ipHdr.dataSrcIPMask) < 0) + return -1; + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipv6addr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipv6addr); + } + } + + if (HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.ipHdr.dataDstIPAddr)) { + + if (printDataType(vars, + ipv6addr, sizeof(ipv6addr), + &rule->p.ipv6HdrFilter.ipHdr.dataDstIPAddr) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ip6", daddr, NULL); + if (ENTRY_WANT_NEG_SIGN(&rule->p.ipv6HdrFilter.ipHdr.dataDstIPAddr)) + virFirewallCmdAddArg(fw, fwrule, "!="); + + if (HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.ipHdr.dataDstIPMask)) { + if (printDataType(vars, + number, sizeof(number), + &rule->p.ipv6HdrFilter.ipHdr.dataDstIPMask) < 0) + return -1; + virFirewallCmdAddArgFormat(fw, fwrule, + "%s/%s", ipv6addr, number); + } else { + virFirewallCmdAddArg(fw, fwrule, ipv6addr); + } + } + + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.ipv6HdrFilter.ipHdr.dataProtocolID, + "ip6 nexthdr") < 0) + return -1; + if (insertRuleArg2Param(fw, fwrule, vars, + &rule->p.ipv6HdrFilter.portData.dataSrcPortStart, + &rule->p.ipv6HdrFilter.portData.dataSrcPortEnd, + reverseRule ? "th dport" : "th sport", "-") < 0) + return -1; + if (insertRuleArg2Param(fw, fwrule, vars, + &rule->p.ipv6HdrFilter.portData.dataDstPortStart, + &rule->p.ipv6HdrFilter.portData.dataDstPortEnd, + reverseRule ? "th sport" : "th dport", "-") < 0) + return -1; + if (HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.dataICMPTypeStart) || + HAS_ENTRY_ITEM(&rule->p.ipv6HdrFilter.dataICMPCodeStart)) { + + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.ipv6HdrFilter.dataICMPTypeStart, + "icmpv6 type") < 0) + return -1; + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.ipv6HdrFilter.dataICMPCodeStart, + "icmpv6 code") < 0) + return -1; + } + break; + case VIR_NWFILTER_RULE_PROTOCOL_VLAN: + if (nftablesHandleEthHdr(fw, fwrule, + vars, + &rule->p.vlanHdrFilter.ethHdr, reverseRule) < 0) + return -1; + + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", "0x8100", NULL); + + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.vlanHdrFilter.dataVlanID, + "vlan id") < 0) + return -1; + if (insertRuleArgParam(fw, fwrule, vars, + &rule->p.vlanHdrFilter.dataVlanEncap, + "vlan type") < 0) + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_STP: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("STP filtering in nftables is not supported")); + return -1; + break; + case VIR_NWFILTER_RULE_PROTOCOL_NONE: + break; + default: + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected rule protocol '%1$d', priority '%2$d'"), + rule->prtclType, + rule->priority); + return -1; + } + + return 0; +} + +/* + * nftablesGetNFTable: + * + * @rule: The rule of the filter + * + * We have a seperate table, due to eb/iptables compatibilty + * Ideally we allow users to have only 1 table in which all rules are placed + * We'll need to turn that into a nwfilter feature + */ +static const char *nftablesGetNFTable(virNWFilterRuleDef *rule) +{ + return virNWFilterRuleIsProtocolEthernet(rule) ? + NF_ETHERNET_TABLE : + NF_OTHER_TABLE; +} + +static void +nftablesAddCmdUserComment(virFirewall *fw, + virFirewallCmd *fwrule, + virNWFilterRuleDef *rule) +{ + g_autofree char *comment = NULL; + comment = virStringReplace( + rule->p.allHdrFilter.ipHdr.dataComment.u.string, + "\"", "'"); + + virFirewallCmdAddArgFormat(fw, fwrule, + "\"priority=%d,usercomment=%s\"", + rule->priority, comment); +} + +/* + * nftablesCreateRuleInstance: + * @fw: the firewall ruleset instance + * @layer: the firewall layer + * @chainPrefix: The suffix to put on the end of the name of the chain + * @rule: The rule of the filter to convert + * @ifname : The name of the interface to apply the rule to + * @vars : A map containing the variables to resolve + * @res : The data structure to store the result(s) into + * + * Convert a single rule into its representation for later instantiation + * + * Returns 0 in case of success with the result stored in the data structure + * pointed to by res, -1 otherwise + */ +static int +nftablesCreateRuleInstance(virFirewall *fw, + virFirewallLayer layer, + const char *chainPrefix, + virNWFilterRuleDef *rule, + const char *ifname, + virNWFilterVarCombIter *vars, + bool directionIn, + bool reverseRule) +{ + int ret = -1; + char chain[MAX_NF_CHAINNAME_LENGTH]; + virFirewallCmd *fwrule = NULL; + const char *root = virNWFilterChainSuffixTypeToString( + VIR_NWFILTER_CHAINSUFFIX_ROOT); + const char *nftablesRootTable = nftablesGetNFTable(rule); + + /* apply root rules directly on the root chain, for example: + * vnet1-in vnet1-out */ + if (STREQ(chainPrefix, root)) { + g_snprintf(chain, sizeof(chain), "n-%s-%s", ifname, + directionIn ? "in" : "out"); + } else { + g_snprintf(chain, sizeof(chain), "n-%s-%s-%s", ifname, chainPrefix, + directionIn ? "in" : "out"); + } + + fwrule = virFirewallAddCmd(fw, layer, + "add", "rule", "bridge", + nftablesRootTable, chain, NULL); + + if (virNWFilterRuleIsProtocolEthernet(rule)) { + if (nftablesHandleEthernetRule(fw, fwrule, vars, rule, reverseRule) < 0) + goto cleanup; + } else { + if (nftablesHandleOtherRule(fw, fwrule, vars, rule, + directionIn, reverseRule) < 0) + goto cleanup; + } + + if (NF_COUNTER) + virFirewallCmdAddArg(fw, fwrule, "counter"); + + /* specify the action for this rule */ + nftablesAddCmdAction(fw, fwrule, rule->action); + + /* process rule comment */ + virFirewallCmdAddArg(fw, fwrule, "comment"); + + /* ethernet rules don't have the allHdrFilter */ + if (HAS_ENTRY_ITEM(&rule->p.allHdrFilter.ipHdr.dataComment) && + !virNWFilterRuleIsProtocolEthernet(rule)) { + nftablesAddCmdUserComment(fw, fwrule, rule); + } else { + virFirewallCmdAddArgFormat(fw, fwrule, "\"priority=%d\"", rule->priority); + } + + ret = 0; + + cleanup: + if (ret == -1) + virFirewallRemoveCmd(fw, fwrule); + + return ret; +} + +static int +nftablesRuleInstCommand(virFirewall *fw, + virFirewallLayer layer, + const char *ifname, + virNWFilterRuleInst *rule) +{ + int ret = -1; + virNWFilterVarCombIter *vciter; + virNWFilterVarCombIter *tmp; + virNWFilterRuleDirectionType direction = rule->def->tt; + + /* rule->vars holds all the variables names that this rule will access. + * iterate over all combinations of the variables' values and instantiate + * the filtering rule with each combination. + */ + tmp = vciter = virNWFilterVarCombIterCreate(rule->vars, + rule->def->varAccess, + rule->def->nVarAccess); + if (!vciter) + return -1; + + do { + bool reverseRule = false; + + VIR_DEBUG("rule[chain='%s', dir='%d', prio='%d', action='%d', chainPrio='%d']", + rule->chainSuffix, + direction, + rule->priority, + rule->def->action, + rule->chainPriority); + + if (direction == VIR_NWFILTER_RULE_DIRECTION_INOUT) { + /* for direction inout we run the create instance twice, + * with directionIn set to true and false */ + + /* in */ + if (nftablesCreateRuleInstance(fw, layer, rule->chainSuffix, + rule->def, ifname, tmp, + true, reverseRule) < 0) + goto cleanup; + + /* for ethernet rules, to comply to what ebiptables did, + * we set reverseRule to true on direction inout */ + reverseRule = virNWFilterRuleIsProtocolEthernet(rule->def); + + /* out */ + if (nftablesCreateRuleInstance(fw, layer, rule->chainSuffix, + rule->def, ifname, tmp, + false, reverseRule) < 0) + goto cleanup; + } else { + bool directionIn = direction == VIR_NWFILTER_RULE_DIRECTION_IN; + /* otherwise we provide directionIn */ + if (nftablesCreateRuleInstance(fw, layer, rule->chainSuffix, + rule->def, ifname, tmp, + directionIn, reverseRule) < 0) + goto cleanup; + + /* rules that do conntrack matching and have action accept need a + * reverse rule on the other chain to accept the reply direction + * so if we accept outbound we need an accept on the inbound for + * established connections */ + if (nftablesRuleNeedsConntrack(rule->def) && + rule->def->action == VIR_NWFILTER_RULE_ACTION_ACCEPT) { + reverseRule = true; + if (nftablesCreateRuleInstance(fw, layer, rule->chainSuffix, + rule->def, ifname, tmp, + !directionIn, reverseRule) < 0) + goto cleanup; + } + } + + tmp = virNWFilterVarCombIterNext(tmp); + } while (tmp != NULL); + + ret = 0; + cleanup: + virNWFilterVarCombIterFree(vciter); + + return ret; +} + +/* + * nftablesCreateSubChain: + * @fw: the firewall ruleset instance + * @layer: the firewall layer + * @ifname : The name of the interface to apply the chain to + * @chainPrefix: The prefix to put on the beginning of the name of the chain + * @protoidx: Protocol id for conditional jump + * @rootChain: The chain to define the jump on + * @chainPostfix: The postfix to put at the end of the name of the chain + * + * Creates the user defined chain, chain='mac', with chainPostfix set to 'in' + * on vnet1 for example leads to: + * - vnet1-mac-in + * + * Rules get defined on the corresponding chain based on the chosen direction, + * either in or out or both (in and out) when direction has been set to 'inout' + */ +static void +nftablesCreateSubChain(virFirewall *fw, + virFirewallLayer layer, + const char *nftablesTableName, + const char *chainPrefix, + enum l3_proto_idx protoidx, + const char *rootChain, + const char *chainPostfix) +{ + char chain[MAX_NF_CHAINNAME_LENGTH]; + virFirewallCmd *fwrule = NULL; + g_snprintf(chain, sizeof(chain), "%s-%s", chainPrefix, chainPostfix); + + VIR_DEBUG("Defining chain '%s'", chain); + + virFirewallAddCmd(fw, layer, "add", "chain", "bridge", + nftablesTableName, chain, CHAINSETTINGS, NULL); + + /* add VM interface jump */ + fwrule = virFirewallAddCmd(fw, layer, "add", "rule", "bridge", + nftablesTableName, rootChain, NULL); + if (protoidx != -1 && l3_protocols[protoidx].attr) { + virFirewallCmdAddArgList(fw, fwrule, "ether", "type", NULL); + virFirewallCmdAddArgFormat(fw, fwrule, + "0x%04x", l3_protocols[protoidx].attr); + } + + virFirewallCmdAddArgList(fw, fwrule, "jump", chain, NULL); +} + +static void +nftablesCreateRootChainJump(virFirewall *fw, + virFirewallLayer layer, + const char *ifname, + const char *ifMatch, + const char *topChain, + const char *rootChain, + bool addTmpJump) +{ + virFirewallCmd *fwrule = NULL; + + if (addTmpJump) { + /* tmp iif oif jump */ + virFirewallAddCmd(fw, layer, "add", "rule", "bridge", NF_OTHER_TABLE, + topChain, ifMatch, ifname, "jump", rootChain, NULL); + virFirewallAddCmd(fw, layer, "add", "rule", "bridge", NF_ETHERNET_TABLE, + topChain, ifMatch, ifname, "jump", rootChain, NULL); + } + + /* remove VM interface jump */ + fwrule = virFirewallAddCmdFull(fw, layer, true, NULL, NULL, "delete", + "element", "bridge", NF_OTHER_TABLE, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "vmap-%s", ifMatch); + virFirewallCmdAddArgList(fw, fwrule, "{", ifname, "}", NULL); + /* add VM interface jump */ + fwrule = virFirewallAddCmd(fw, layer, "add", "element", "bridge", + NF_OTHER_TABLE, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "vmap-%s", ifMatch); + virFirewallCmdAddArgList(fw, fwrule, "{", ifname, ":", "jump", + rootChain, "}", NULL); + + /* remove VM interface jump */ + fwrule = virFirewallAddCmdFull(fw, layer, true, NULL, NULL, "delete", + "element", "bridge", + NF_ETHERNET_TABLE, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "vmap-%s", ifMatch); + virFirewallCmdAddArgList(fw, fwrule, "{", ifname, "}", NULL); + /* add VM interface jump */ + fwrule = virFirewallAddCmd(fw, layer, "add", "element", "bridge", + NF_ETHERNET_TABLE, NULL); + virFirewallCmdAddArgFormat(fw, fwrule, "vmap-%s", ifMatch); + virFirewallCmdAddArgList(fw, fwrule, "{", ifname, ":", "jump", rootChain, + "}", NULL); +} + +/* + * nftablesCreateRootChain: + * @fw: the firewall ruleset instance + * @layer: the firewall layer + * @ifname : The name of the interface to apply the chain to + * @ifMatch : The matcher to use for this root chain, iif/oif + * @chainPrefix: The prefix to put on the beginning of the name of the chain + * @protoidx: Protocol id for conditional jump + * @topChain: The chain to define the jump on + * @rootChain: The root chain for the interface to create + * + * Creates the interface root chain, chainPostfix set to 'in' + * on vnet1 for example, leads to: + * - vnet1-in + * + * These root chains are the chains where all the subchains jumps get added to + * vnet1-in -> jump vnet-mac-in; ether type ip jump vnet-ip-in; + */ +static void +nftablesCreateRootChain(virFirewall *fw, + virFirewallLayer layer, + const char *rootChain) +{ + VIR_DEBUG("Defining root chain '%s'", rootChain); + + virFirewallAddCmd(fw, layer, "add", "chain", "bridge", + NF_ETHERNET_TABLE, rootChain, CHAINSETTINGS, NULL); + + virFirewallAddCmd(fw, layer, "add", "chain", "bridge", + NF_OTHER_TABLE, rootChain, CHAINSETTINGS, NULL); +} + +typedef struct _nftablesSubChain nftablesSubChain; +struct _nftablesSubChain { + /* we use the lowest rule priority in a chain to compare root rule inserts + * see nftablesHandleCreateChains for the explanation */ + virNWFilterRulePriority lowestRulePriority; + virNWFilterChainPriority priority; + enum l3_proto_idx protoidx; + char prefix[MAX_NF_CHAINNAME_LENGTH]; + const char *suffix; + bool hasEthernetRules; + bool hasOtherRules; +}; + +static int nftablesChainCreateSort(const void *a, const void *b, + void *opaque G_GNUC_UNUSED) +{ + const nftablesSubChain *insta = *(const nftablesSubChain **)a; + const nftablesSubChain *instb = *(const nftablesSubChain **)b; + const char *root = virNWFilterChainSuffixTypeToString( + VIR_NWFILTER_CHAINSUFFIX_ROOT); + bool root_a = STREQ(insta->suffix, root); + bool root_b = STREQ(instb->suffix, root); + + /* ensure root chain commands appear before all others since + we will need them to create the child chains */ + if (root_a) { + if (!root_b) + return -1; /* a before b */ + } else if (root_b) { + return 1; /* b before a */ + } + + /* priorities are limited to range [-1000, 1000] */ + return insta->priority - instb->priority; +} + +static void +nftablesGetSubChains(nftablesSubChain ***chains, + size_t *nchains, + virNWFilterRuleInst **rules, + size_t nrules, + const char *ifname) +{ + size_t i, j; + + for (i = 0; i < nrules; i++) { + g_autofree nftablesSubChain *chain = NULL; + nftablesSubChain **chainst = *chains; + bool registered = false; + bool isEthernetRule = virNWFilterRuleIsProtocolEthernet( + rules[i]->def); + + for (j = 0; j < *nchains; j++) { + if (STREQ(rules[i]->chainSuffix, chainst[j]->suffix)) { + VIR_DEBUG("Chain already registered '%s'", chainst[j]->suffix); + + /* using ifs here as they are more readable */ + if (!chainst[j]->hasEthernetRules && isEthernetRule) + chainst[j]->hasEthernetRules = true; + if (!chainst[j]->hasOtherRules && !isEthernetRule) + chainst[j]->hasOtherRules = true; + + registered = true; + break; + } + } + + if (registered) + continue; + + /* filter out the root chain */ + if (STREQ(rules[i]->chainSuffix, + virNWFilterChainSuffixTypeToString(VIR_NWFILTER_CHAINSUFFIX_ROOT))) + continue; + + /* register the chain for creation */ + chain = g_new0(nftablesSubChain, 1); + + chain->hasEthernetRules = isEthernetRule; + chain->hasOtherRules = !chain->hasEthernetRules; + chain->priority = rules[i]->chainPriority; + chain->lowestRulePriority = rules[i]->priority; + chain->suffix = rules[i]->chainSuffix; + g_snprintf(chain->prefix, sizeof(chain->prefix), + "n-%s-%s", ifname, chain->suffix); + + VIR_APPEND_ELEMENT(*chains, *nchains, chain); + } +} + +static int +nftablesHandleCreateChains(virFirewall *fw, + virFirewallLayer layer, + const char *const *lines G_GNUC_UNUSED, + void *opaque) +{ + size_t i, j, nchains = 0; + size_t lastProcessedRootRuleIndex = 0; + int ret = -1; + chainCreateCallbackData *cbdata = opaque; + nftablesSubChain **chains = NULL; + char rootChainIn[MAX_NF_CHAINNAME_LENGTH]; + char rootChainOut[MAX_NF_CHAINNAME_LENGTH]; + const char *rootChainName = virNWFilterChainSuffixTypeToString( + VIR_NWFILTER_CHAINSUFFIX_ROOT); + g_snprintf(rootChainIn, sizeof(rootChainIn), "n-%s-in", cbdata->ifname); + g_snprintf(rootChainOut, sizeof(rootChainOut), "n-%s-out", cbdata->ifname); + + nftablesGetSubChains(&chains, + &nchains, + cbdata->rules, + cbdata->nrules, + cbdata->ifname); + + /* sort chains on their chain priority */ + g_qsort_with_data(chains, nchains, sizeof(chains[0]), + nftablesChainCreateSort, NULL); + + /* first we create the root interface in-out chains */ + nftablesCreateRootChain(fw, layer, rootChainIn); + nftablesCreateRootChain(fw, layer, rootChainOut); + + /* Note that filtering rules in the root chain are sorted with filters + * connected to the root chain following their priorities. This allows + * interleaving filtering rules with access to filter chains. (See also + * the nwfilter documentation section on Filtering chain priorities.) + * + * On the root chain, to maintain compatibility with tables created under + * the ebiptables driver, we need to process root rule commands before or + * after chain definitions and jumps based on rule priority. For example, + * if we have root chain rules with prio 100 and the ipv4 chain has rules + * between 200–300, the root chain rules must be placed BEFORE the ipv4 + * root chain jump. + * + * This only applies to rules on the root chain, as all other chain rules + * are sorted correctly. Since chain definitions are processed before the + * rules, the ordering would otherwise be messed up. We also can't just + * create chains whenever a new one appears during rule processing, since + * chains have their own priority, which would disrupt both chain and jump + * priorities. + * + * To sum up: create the root chain, then create root rules and subchains + * in order based on chain priority. Root rules are created and inserted + * according to their own priority, while subchains follow based on their + * lowest rule priority. */ + + /* create chain if it doesn't exist */ + /* define undefined sub chains */ + for (i = 0; i < nchains; i++) { + enum l3_proto_idx protoidx; + + /* root chain firewall rules, if there are root chain firewall rules + * with a lower priority than this chains lowest rule priority */ + for (j = lastProcessedRootRuleIndex; j < cbdata->nrules; j++) { + /* as root rules are inserted before all other rules, + * we stop walking the rules list when we've hit a no root rule*/ + if (STRNEQ(cbdata->rules[j]->chainSuffix, rootChainName)) { + break; + } + + lastProcessedRootRuleIndex = j; + if (chains[i]->lowestRulePriority > cbdata->rules[j]->priority) { + if (nftablesRuleInstCommand(fw, layer, + cbdata->ifname, + cbdata->rules[j]) < 0) + goto cleanup; + } else { + break; + } + } + + protoidx = nftablesGetProtoIdxByFiltername(chains[i]->suffix); + if (chains[i]->hasEthernetRules) { + nftablesCreateSubChain(fw, layer, NF_ETHERNET_TABLE, + chains[i]->prefix, protoidx, + rootChainIn, "in"); + nftablesCreateSubChain(fw, layer, NF_ETHERNET_TABLE, + chains[i]->prefix, protoidx, + rootChainOut, "out"); + } + if (chains[i]->hasOtherRules) { + nftablesCreateSubChain(fw, layer, NF_OTHER_TABLE, + chains[i]->prefix, protoidx, + rootChainIn, "in"); + nftablesCreateSubChain(fw, layer, NF_OTHER_TABLE, + chains[i]->prefix, protoidx, + rootChainOut, "out"); + } + } + + /* process the firewall rules and chains */ + /* everything before lastProcessedRootRuleIndex has been created */ + for (i = lastProcessedRootRuleIndex; i < cbdata->nrules; i++) { + if (nftablesRuleInstCommand(fw, layer, + cbdata->ifname, cbdata->rules[i]) < 0) + goto cleanup; + } + + /* creation of temp jumps is done as libvirt doesn't execute + * atomic nft changes (yet) */ + nftablesCreateRootChainJump(fw, layer, cbdata->ifname, IN_IFMATCH, + IN_CHAIN, rootChainIn, true); + nftablesCreateRootChainJump(fw, layer, cbdata->ifname, OUT_IFMATCH, + OUT_CHAIN, rootChainOut, true); + + ret = 0; + + cleanup: + for (i = 0; i < nchains; i++) + g_free(chains[i]); + + return ret; +} + +/** + * nftablesCreateRootTables + * + * @fw: the firewall instance + * + * Run nft list tables and parse if the table already exist + * skips creation of base table if possible + * see handler in nftablesHandleCreateRootTables + */ +static void nftablesCreateRootTables(virFirewall *fw) +{ + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleCreateRootTables, + NULL, + "list", "tables", NULL); +} + +/** + * nftablesCreateChains + * + * @fw: the firewleset instance + * @cbdata: callback data struct which holds variables that + * the call back handler needs in order to create + * the base table and the dependant rules + * + * Run nft list table libvirt-nwfilter and parse if the chains already exist + * skips creation of chains if possible + * see handler in nftablesHandleCreateChains + */ +static void nftablesCreateChains(virFirewall *fw, + chainCreateCallbackData *cbdata) +{ + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleCreateChains, + (void *)cbdata, + "list", "chains", NULL); +} + +static const char *breakStrAt(const char *str, char untilc) +{ + const char *untilPtr = strchr(str, untilc); + if (untilPtr) { + *(char *)untilPtr = '\0'; + } + + return str; +} + +static int +nftablesHandleRenameChains(virFirewall *fw, + virFirewallLayer layer, + const char *const *lines, + void *opaque) +{ + size_t i = 0; + const char *ifname = opaque; + const char *tableName = NULL; + const char *chain = NULL; + const char *newName = NULL; + char chainCompare[MAX_NF_CHAINNAME_LENGTH]; + g_snprintf(chainCompare, sizeof(chainCompare), "n-%s-", ifname); + + /* parse nft tables list output to see if chains exist */ + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + + /* first we'll have to parse the table name */ + if (tableName == NULL && STRPREFIX(line, "table bridge ")) { + line = STRSKIP(line, "table bridge "); + /* parse table that we want to clean */ + tableName = breakStrAt(line, ' '); + continue; + } + + virSkipSpaces(&line); + + if ((line = STRSKIP(line, "chain ")) == NULL) { + continue; + } + chain = breakStrAt(line, ' '); + + if (STRPREFIX(chain, chainCompare) && STRPREFIX(chain, "n-")) { + /* new name is name without n- at the prefix */ + newName = chain + strlen("n-"); + VIR_DEBUG("Scheduling chain rename '%s'->'%s' on table '%s'", + chain, newName, tableName); + /* delete the chain */ + virFirewallAddCmd(fw, layer, + "rename", "chain", "bridge", + tableName, chain, newName, NULL); + } + } + + return 0; +} + +static int +nftablesHandleRemoveAll(virFirewall *fw, + virFirewallLayer layer, + const char *const *lines, + void *opaque) +{ + size_t i = 0; + const char *ifname = opaque; + const char *tableName = NULL; + const char *chain = NULL; + char chainCompare[MAX_NF_CHAINNAME_LENGTH]; + char fwCompare[MAX_NF_CHAINNAME_LENGTH]; + char tmpFwCompare[MAX_NF_CHAINNAME_LENGTH]; + g_snprintf(chainCompare, sizeof(chainCompare), "%s-", ifname); + g_snprintf(fwCompare, sizeof(fwCompare), "\"%s\" jump %s-", ifname, ifname); + /* match possible tmp jump on tmp name "\"vnet0\"" jump n-vnet0-" */ + g_snprintf(tmpFwCompare, sizeof(tmpFwCompare), "\"%s\" jump n-%s-", ifname, + ifname); + + /* parse nft tables list output to see if chains exist */ + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + + /* first we'll have to parse the table name */ + if (tableName == NULL && STRPREFIX(line, "table bridge ")) { + line = STRSKIP(line, "table bridge "); + /* parse table that we want to clean */ + tableName = breakStrAt(line, ' '); + continue; + } + + virSkipSpaces(&line); + + /* delete tmp jumps */ + if (strstr(line, fwCompare) != NULL || + strstr(line, tmpFwCompare) != NULL) { + line = strchr(line, '#'); + if ((line = STRSKIP(line, "# handle ")) == NULL) + continue; + + /* delete jump */ + virFirewallAddCmd(fw, layer, + "delete", "rule", "bridge", tableName, chain, + "handle", line, NULL); + + continue; + } + + if ((line = STRSKIP(line, "chain ")) == NULL) { + continue; + } + chain = breakStrAt(line, ' '); + + if (STRPREFIX(chain, chainCompare)) { + VIR_DEBUG("Scheduling chain '%s' on table '%s' for deletion", + chain, tableName); + /* delete the chain */ + virFirewallAddCmd(fw, layer, + "delete", "chain", "bridge", + tableName, chain, NULL); + } + } + + return 0; +} + +static void +nftablesRemoveAllInterfaceChains(virFirewall *fw, const char *ifname) +{ + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleRemoveAll, + (void *)ifname, + "-a", "list", "table", "bridge", + NF_ETHERNET_TABLE, NULL); + + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleRemoveAll, + (void *)ifname, + "-a", "list", "table", "bridge", + NF_OTHER_TABLE, NULL); +} + +static void +nftablesRenameAllInterfaceChains(virFirewall *fw, const char *ifname) +{ + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleRenameChains, + (void *)ifname, + "-a", "list", "table", "bridge", + NF_ETHERNET_TABLE, NULL); + + virFirewallAddCmdFull(fw, VIR_FIREWALL_LAYER_ETHERNET, + false, nftablesHandleRenameChains, + (void *)ifname, + "-a", "list", "table", "bridge", + NF_OTHER_TABLE, NULL); +} + +static int +nftablesApplyNewRules(const char *ifname, + virNWFilterRuleInst **rules, + size_t nrules) +{ + size_t i; + g_autoptr(GHashTable) chains_in_set = virHashNew(NULL); + g_autoptr(GHashTable) chains_out_set = virHashNew(NULL); + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + chainCreateCallbackData chainCallbackData = {ifname, nrules, rules}; + + /* nwfilter_nftables applies new rules first, then remove old rules + * in order to do this we: + * - place the new chains under a name prefixed with "n-" + * - create tmp jump that catches vmap switch moment, + * traffic will temporarily not be matched as an entry from the vmap will + * be deleted and then recreated as you can't atomic update vmaps via a + * single command + * - in the tearOldRules function, we also remove the tmp interface jump to + * the new chains + * - in tearOldRules we remove the old chains + * - in tearOldRules we rename the "n-" chains by removing "n-" from the + * chain name + * + * This allows us in a rollback scenario to simply remove the new chains + * and jumps + */ + char tmpIfname[VIR_INT64_STR_BUFLEN]; + g_snprintf(tmpIfname, sizeof(tmpIfname), "n-%s", ifname); + + /* walk the list of rules and increase the priority + * of rules in case the chain priority is of higher value; + * this preserves the order of the rules and ensures that + * the chain will be created before the chain's rules + * are created; don't adjust rules in the root chain + * example: a rule of priority -510 will be adjusted to + * priority -500 and the chain with priority -500 will + * then be created before it. + */ + for (i = 0; i < nrules; i++) { + if (rules[i]->chainPriority > rules[i]->priority && + !strstr("root", rules[i]->chainSuffix)) { + + rules[i]->priority = rules[i]->chainPriority; + } + } + + /* sort rules */ + if (nrules) { + g_qsort_with_data(rules, nrules, sizeof(rules[0]), + virNWFilterRuleInstSortPtr, NULL); + } + + virFirewallStartTransaction(fw, 0); + + /* create root tables if they don't exist already */ + nftablesCreateRootTables(fw); + /* create user chains and rules */ + nftablesCreateChains(fw, &chainCallbackData); + + /* rollback commands, if necessary */ + virFirewallStartRollback(fw, 0); + nftablesRemoveAllInterfaceChains(fw, tmpIfname); + + /* process rules and apply them */ + return virFirewallApply(fw); +} + +static int +nftablesTeardownNewRules(const char *ifname) +{ + char matchIfname[VIR_INT64_STR_BUFLEN]; + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + + g_snprintf(matchIfname, sizeof(matchIfname), "n-%s", ifname); + + virFirewallStartTransaction(fw, 0); + + /* remove tmp interface chains and rules */ + nftablesRemoveAllInterfaceChains(fw, matchIfname); + + return virFirewallApply(fw); +} + +static int +nftablesTeardownOldRules(const char *ifname) +{ + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + virFirewallStartTransaction(fw, 0); + + /* remove old interface chains and rules */ + nftablesRemoveAllInterfaceChains(fw, ifname); + + /* rename new temp interface chains and rules */ + nftablesRenameAllInterfaceChains(fw, ifname); + + return virFirewallApply(fw); +} + +/** + * nftablesAllTeardown: + * @ifname : the name of the interface to which the rules apply + * + * Unconditionally remove all possible user defined tables and rules + * that were created for the given interface (ifname). + * + * Returns 0 on success, -1 on OOM + */ +static int +nftablesAllTeardown(const char *ifname) +{ + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + virFirewallStartTransaction(fw, 0); + + /* remove interface chains and rules */ + nftablesRemoveAllInterfaceChains(fw, ifname); + + return virFirewallApply(fw); +} + +/** + * nftablesCanApplyBasicRules + * + * Determine whether this driver can apply the basic rules, meaning + * run nftablesApplyBasicRules and nftablesApplyDHCPOnlyRules. + * In case of this driver we need the nft tool available. + */ +static bool nftablesCanApplyBasicRules(void) +{ + return true; +} + +/** + * nftablesApplyBasicRules + * + * @ifname: name of the backend-interface to which to apply the rules + * @macaddr: MAC address the VM is using in packets sent through the + * interface + * + * Returns 0 on success, -1 on failure with the rules removed + * + * Apply basic filtering rules on the given interface + * - filtering for MAC address spoofing + * - allowing IPv4 & ARP traffic + */ +static int +nftablesApplyBasicRules(const char *ifname, + const virMacAddr *macaddr) +{ + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + char macaddr_str[VIR_MAC_STRING_BUFLEN]; + char rootChainIn[MAX_NF_CHAINNAME_LENGTH]; + char rootChainOut[MAX_NF_CHAINNAME_LENGTH]; + + virMacAddrFormat(macaddr, macaddr_str); + + if (nftablesAllTeardown(ifname) < 0) + return -1; + + virFirewallStartTransaction(fw, 0); + + /* create root tables if they don't exist already */ + nftablesCreateRootTables(fw); + + /* create root chain */ + g_snprintf(rootChainIn, sizeof(rootChainIn), "%s-in", ifname); + g_snprintf(rootChainOut, sizeof(rootChainOut), "%s-out", ifname); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainIn); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainOut); + + + /* apply rules to root chain */ + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "ether", "saddr", + "!=", macaddr_str, "drop", NULL); + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "ether", "type", "ip", + "accept", NULL); + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "ether", "type", "arp", + "accept", NULL); + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "accept", NULL); + + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + IN_IFMATCH, IN_CHAIN, rootChainIn, false); + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + OUT_IFMATCH, OUT_CHAIN, rootChainOut, false); + + if (virFirewallApply(fw) < 0) { + nftablesAllTeardown(ifname); + return -1; + } + + return 0; +} + +/** + * nftablesApplyDHCPOnlyRules + * + * @ifname: name of the backend-interface to which to apply the rules + * @macaddr: MAC address the VM is using in packets sent through the + * interface + * @dhcpsrvrs: The DHCP server(s) from which the VM may receive traffic + * from; may be NULL + * @leaveTemporary: Whether to leave the table names with their temporary + * names (true) or also perform the renaming to their final names as + * part of this call (false) + * + * Returns 0 on success, -1 on failure with the rules removed + * + * Apply filtering rules so that the VM can only send and receive + * DHCP traffic and nothing else. + */ +static int +nftablesApplyDHCPOnlyRules(const char *ifname, + const virMacAddr *macaddr, + virNWFilterVarValue *dhcpsrvrs, + bool leaveTemporary G_GNUC_UNUSED) +{ + char rootChainIn [MAX_NF_CHAINNAME_LENGTH], + rootChainOut[MAX_NF_CHAINNAME_LENGTH]; + char macaddr_str[VIR_MAC_STRING_BUFLEN]; + unsigned int idx = 0; + unsigned int num_dhcpsrvrs; + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + + virMacAddrFormat(macaddr, macaddr_str); + + if (nftablesAllTeardown(ifname) < 0) + return -1; + + virFirewallStartTransaction(fw, 0); + + /* create root tables if they don't exist already */ + nftablesCreateRootTables(fw); + + /* create root chain */ + g_snprintf(rootChainIn, sizeof(rootChainIn), "%s-in", ifname); + g_snprintf(rootChainOut, sizeof(rootChainOut), "%s-out", ifname); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainIn); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainOut); + + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "ether", "saddr", + macaddr_str, "ether", "type", "ip", + "udp", "sport", "68", "udp", "dport", "67", "accept", NULL); + + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "drop", NULL); + + num_dhcpsrvrs = (dhcpsrvrs != NULL) + ? virNWFilterVarValueGetCardinality(dhcpsrvrs) + : 0; + + while (true) { + const char *dhcpserver = NULL; + int ctr; + + if (idx < num_dhcpsrvrs) + dhcpserver = virNWFilterVarValueGetNthValue(dhcpsrvrs, idx); + + /* + * create two rules allowing response to MAC address of VM + * or to broadcast MAC address + */ + for (ctr = 0; ctr < 2; ctr++) { + if (dhcpserver) + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, + "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainIn, "ether", + "daddr", + (ctr == 0) ? macaddr_str : "ff:ff:ff:ff:ff:ff", + "ether", "type", "ip", + "ip", "saddr", dhcpserver, + "udp", "sport", "67", + "udp", "dport", "68", "accept", NULL); + else + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, + "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainIn, "ether", + "daddr", + (ctr == 0) ? macaddr_str : "ff:ff:ff:ff:ff:ff", + "ether", "type", "ip", + "udp", "sport", "67", + "udp", "dport", "68", "accept", NULL); + } + + idx++; + + if (idx >= num_dhcpsrvrs) + break; + } + + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainIn, "drop", NULL); + + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + IN_IFMATCH, IN_CHAIN, rootChainIn, false); + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + OUT_IFMATCH, OUT_CHAIN, rootChainOut, false); + + if (virFirewallApply(fw) < 0) { + nftablesAllTeardown(ifname); + return -1; + } + + return 0; +} + +static int +nftablesRemoveBasicRules(const char *ifname) +{ + return nftablesAllTeardown(ifname); +} + +/** + * nftablesApplyDropAllRules + * + * @ifname: name of the backend-interface to which to apply the rules + * + * Returns 0 on success, -1 on failure with the rules removed + * + * Apply filtering rules so that the VM cannot receive or send traffic. + */ +static int +nftablesDropAllRules(const char *ifname) +{ + char rootChainIn [MAX_NF_CHAINNAME_LENGTH], + rootChainOut[MAX_NF_CHAINNAME_LENGTH]; + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + + if (nftablesAllTeardown(ifname) < 0) + return -1; + + virFirewallStartTransaction(fw, 0); + + /* create root tables if they don't exist already */ + nftablesCreateRootTables(fw); + + /* create root chain */ + g_snprintf(rootChainIn, sizeof(rootChainIn), "%s-in", ifname); + g_snprintf(rootChainOut, sizeof(rootChainOut), "%s-out", ifname); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainIn); + nftablesCreateRootChain(fw, VIR_FIREWALL_LAYER_ETHERNET, rootChainOut); + + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainOut, "drop", NULL); + virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_ETHERNET, "add", "rule", "bridge", + NF_ETHERNET_TABLE, rootChainIn, "drop", NULL); + + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + IN_IFMATCH, IN_CHAIN, rootChainIn, false); + nftablesCreateRootChainJump(fw, VIR_FIREWALL_LAYER_ETHERNET, ifname, + OUT_IFMATCH, OUT_CHAIN, rootChainOut, false); + + if (virFirewallApply(fw) < 0) { + nftablesAllTeardown(ifname); + return -1; + } + + return 0; +} + +static int +nftablesDriverInit(bool privileged) +{ + if (!privileged) + return 0; + + nftables_driver.flags = TECHDRV_FLAG_INITIALIZED; + + return 0; +} + +static void +nftablesDriverShutdown(void) +{ + nftables_driver.flags = 0; +} + +virNWFilterTechDriver nftables_driver = { + .name = NFTABLES_DRIVER_ID, + .flags = 0, + + .init = nftablesDriverInit, + .shutdown = nftablesDriverShutdown, + + .applyNewRules = nftablesApplyNewRules, + .tearNewRules = nftablesTeardownNewRules, + .tearOldRules = nftablesTeardownOldRules, + .allTeardown = nftablesAllTeardown, + + .canApplyBasicRules = nftablesCanApplyBasicRules, + .applyBasicRules = nftablesApplyBasicRules, + .applyDHCPOnlyRules = nftablesApplyDHCPOnlyRules, + .applyDropAllRules = nftablesDropAllRules, + .removeBasicRules = nftablesRemoveBasicRules, +}; diff --git a/src/nwfilter/nwfilter_nftables_driver.h b/src/nwfilter/nwfilter_nftables_driver.h new file mode 100644 index 0000000000..a767413208 --- /dev/null +++ b/src/nwfilter/nwfilter_nftables_driver.h @@ -0,0 +1,28 @@ +/* + * nwfilter_nftables_driver.h: nftables driver support + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "nwfilter_tech_driver.h" + +extern virNWFilterTechDriver nftables_driver; + +#define NFTABLES_DRIVER_ID "nftables" + +/* see source/include/uapi/linux/netfilter/nf_tables.h */ +#define MAX_NF_CHAINNAME_LENGTH 256 -- 2.43.0