[libvirt] [V2 PATCH] V2 port-profile ID support using new IFLA_VF_PORTS netlink msg

From: Scott Feldman <scofeldm@cisco.com> Use the new IFLA_VF_PORTS netlink msg to associate/disassociate port-profiles on a virtual port backing the VM device. The new netlink msg type has been accepted by netdev kernel maintainer. Tested with Cisco's 10G Ethernet NIC using example XML: <interface type='direct'> <mac address='52:54:00:07:70:cf'/> <source dev='eth2' mode='private' profileid='test-network'/> <target dev='macvtap0'/> <model type='virtio'/> </interface> Changes since V1: - port to new netlink msg types Next steps for V3, etc: 1) merge with Stefan's latest patch, as much as possible 2) assign VM device mac addr to eth and set macvtap mac addr to random 3) figure out where host_uuid lives Signed-off-by: Scott Feldman <scofeldm@cisco.com> Signed-off-by: Roopa Prabhu<roprabhu@cisco.com> --- src/conf/domain_conf.c | 13 ++ src/conf/domain_conf.h | 1 src/libvirt_macvtap.syms | 2 src/qemu/qemu_conf.c | 7 + src/qemu/qemu_driver.c | 10 +- src/util/macvtap.c | 260 +++++++++++++++++++++++++++++++++++++++++++++- src/util/macvtap.h | 7 + 7 files changed, 294 insertions(+), 6 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 20c9c51..577c2ea 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -484,6 +484,7 @@ void virDomainNetDefFree(virDomainNetDefPtr def) case VIR_DOMAIN_NET_TYPE_DIRECT: VIR_FREE(def->data.direct.linkdev); + VIR_FREE(def->data.direct.profileid); break; case VIR_DOMAIN_NET_TYPE_USER: @@ -1831,6 +1832,7 @@ virDomainNetDefParseXML(virCapsPtr caps, char *internal = NULL; char *devaddr = NULL; char *mode = NULL; + char *profileid = NULL; virNWFilterHashTablePtr filterparams = NULL; if (VIR_ALLOC(def) < 0) { @@ -1873,6 +1875,7 @@ virDomainNetDefParseXML(virCapsPtr caps, xmlStrEqual(cur->name, BAD_CAST "source")) { dev = virXMLPropString(cur, "dev"); mode = virXMLPropString(cur, "mode"); + profileid = virXMLPropString(cur, "profileid"); } else if ((network == NULL) && ((def->type == VIR_DOMAIN_NET_TYPE_SERVER) || (def->type == VIR_DOMAIN_NET_TYPE_CLIENT) || @@ -2050,6 +2053,11 @@ virDomainNetDefParseXML(virCapsPtr caps, } else def->data.direct.mode = VIR_DOMAIN_NETDEV_MACVTAP_MODE_VEPA; + if (profileid != NULL) { + def->data.direct.profileid = profileid; + profileid = NULL; + } + def->data.direct.linkdev = dev; dev = NULL; @@ -2115,6 +2123,7 @@ cleanup: VIR_FREE(internal); VIR_FREE(devaddr); VIR_FREE(mode); + VIR_FREE(profileid); virNWFilterHashTableFree(filterparams); return def; @@ -5141,6 +5150,10 @@ virDomainNetDefFormat(virBufferPtr buf, def->data.direct.linkdev); virBufferVSprintf(buf, " mode='%s'", virDomainNetdevMacvtapTypeToString(def->data.direct.mode)); + if (def->data.direct.profileid) { + virBufferEscapeString(buf, " profileid='%s'", + def->data.direct.profileid); + } virBufferAddLit(buf, "/>\n"); break; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fadc8bd..30ebf07 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -290,6 +290,7 @@ struct _virDomainNetDef { struct { char *linkdev; int mode; + char *profileid; } direct; } data; char *ifname; diff --git a/src/libvirt_macvtap.syms b/src/libvirt_macvtap.syms index ae229a0..9d4652e 100644 --- a/src/libvirt_macvtap.syms +++ b/src/libvirt_macvtap.syms @@ -3,3 +3,5 @@ # macvtap.h openMacvtapTap; delMacvtap; +setPortProfileId; +unsetPortProfileId; diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 3e334dc..ea74f1c 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1479,6 +1479,11 @@ qemudPhysIfaceConnect(virConnectPtr conn, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1; + if (net->data.direct.profileid) + setPortProfileId(net->data.direct.linkdev, + net->data.direct.profileid, + NULL, NULL); + rc = openMacvtapTap(net->ifname, net->mac, linkdev, brmode, &res_ifname, vnet_hdr); if (rc >= 0) { @@ -1501,6 +1506,8 @@ qemudPhysIfaceConnect(virConnectPtr conn, close(rc); rc = -1; delMacvtap(net->ifname); + if (net->data.direct.profileid) + unsetPortProfileId(net->data.direct.linkdev); } } } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 65ca117..c5f2f78 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3695,8 +3695,11 @@ static void qemudShutdownVMDaemon(struct qemud_driver *driver, for (i = 0; i < def->nnets; i++) { virDomainNetDefPtr net = def->nets[i]; if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (net->ifname) + if (net->ifname) { delMacvtap(net->ifname); + if (net->data.direct.profileid) + unsetPortProfileId(net->data.direct.linkdev); + } } } #endif @@ -8387,8 +8390,11 @@ qemudDomainDetachNetDevice(struct qemud_driver *driver, #if WITH_MACVTAP if (detach->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (detach->ifname) + if (detach->ifname) { delMacvtap(detach->ifname); + if (detach->data.direct.profileid) + unsetPortProfileId(detach->data.direct.linkdev); + } } #endif diff --git a/src/util/macvtap.c b/src/util/macvtap.c index 1f8dd29..7803bc8 100644 --- a/src/util/macvtap.c +++ b/src/util/macvtap.c @@ -85,14 +85,14 @@ static void nlClose(int fd) * buffer will be returned. */ static -int nlComm(struct nlmsghdr *nlmsg, +int nlComm(struct nlmsghdr *nlmsg, int nlgroups, char **respbuf, int *respbuflen) { int rc = 0; struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK, .nl_pid = 0, - .nl_groups = 0, + .nl_groups = nlgroups, }; int rcvChunkSize = 1024; // expecting less than that int rcvoffset = 0; @@ -192,6 +192,27 @@ nlAppend(struct nlmsghdr *nlm, int totlen, const void *data, int datalen) return pos; } +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((char *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +static struct rtattr *nlNest(struct nlmsghdr *nlm, int totlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(nlm); + + if (nlm->nlmsg_len + NLMSG_ALIGN(sizeof(*nest)) > totlen) + return NULL; + nest->rta_type = type; + nest->rta_len = RTA_LENGTH(0); + nlm->nlmsg_len += sizeof(*nest); + nlAlign(nlm); + return nest; +} + +static int nlNestEnd(struct nlmsghdr *nlm, struct rtattr *nest) +{ + nest->rta_len = (char *)NLMSG_TAIL(nlm) - (char *)nest; + return nlm->nlmsg_len; +} static int link_add(const char *type, @@ -287,7 +308,7 @@ link_add(const char *type, li->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)li; - if (nlComm(nlm, &recvbuf, &recvbuflen) < 0) + if (nlComm(nlm, 0, &recvbuf, &recvbuflen) < 0) return -1; if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) @@ -371,7 +392,7 @@ link_del(const char *name) if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) goto buffer_too_small; - if (nlComm(nlm, &recvbuf, &recvbuflen) < 0) + if (nlComm(nlm, 0, &recvbuf, &recvbuflen) < 0) return -1; if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) @@ -568,6 +589,237 @@ configMacvtapTap(int tapfd, int vnet_hdr) return 0; } +#if 0 +static int +get_host_uuid(char *host_uuid, int len) +{ + const char *dmidecodearg[] = { "dmidecode", "-s", "system-uuid", NULL }; + const char *const dmidecodeenv[] = { "LC_ALL=C", NULL }; + char *binary, *newline; + int dmidecodestdout = -1; + int ret = -1; + pid_t child; + + binary = virFindFileInPath(dmidecodearg[0]); + if (binary == NULL || access(binary, X_OK) != 0) { + VIR_FREE(binary); + return -1; + } + dmidecodearg[0] = binary; + + if (virExec(dmidecodearg, dmidecodeenv, NULL, + &child, -1, &dmidecodestdout, NULL, VIR_EXEC_CLEAR_CAPS) < 0) { + ret = -1; + goto cleanup; + } + + if((ret = saferead(dmidecodestdout, host_uuid, len)) <= 0) { + ret = -1; + goto cleanup; + } + host_uuid[ret-1] = '\0'; + + /* strip newline */ + newline = strrchr(host_uuid, '\n'); + if (newline) + *newline = '\0'; + + ret = 0; + +cleanup: + VIR_FREE(binary); + + if (close(dmidecodestdout) < 0) + ret = -1; + + return ret; +} +#endif + +static int +portProfileIdMcast(const char *linkdev, + const char request, + const char *profileid, + const unsigned short *instance_uuid, + const unsigned short *host_uuid) +{ + struct rtattr *port_self; + char nlmsgbuf[512]; + struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp; + struct ifinfomsg infomsg = { .ifi_family = AF_UNSPEC }; + char rtattbuf[256]; + struct rtattr *rta; + char *recvbuf = NULL; + int recvbuflen; + struct nlmsgerr *err; + int rc = 0; + + memset(&nlmsgbuf, 0, sizeof(nlmsgbuf)); + nlInit(nlm, NLM_F_REQUEST, RTM_SETLINK); + + if (!nlAppend(nlm, sizeof(nlmsgbuf), &infomsg, sizeof(infomsg))) + goto buffer_too_small; + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME, + linkdev, strlen(linkdev) + 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + port_self = nlNest(nlm, sizeof(nlmsgbuf), IFLA_PORT_SELF); + if (!port_self) + return -1; + + switch (request) { + case PORT_REQUEST_ASSOCIATE: + if (profileid && strlen(profileid)) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_PROFILE, + profileid, strlen(profileid) + 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + if (instance_uuid) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_INSTANCE_UUID, + instance_uuid, PORT_UUID_MAX); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + if (host_uuid) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_HOST_UUID, + host_uuid, PORT_UUID_MAX); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + break; + case PORT_REQUEST_DISASSOCIATE: + break; + } + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_REQUEST, + &request, 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + nlNestEnd(nlm, port_self); + + if (nlComm(nlm, RTNLGRP_LINK, &recvbuf, &recvbuflen) < 0) + return -1; + + if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) + goto malformed_resp; + + resp = (struct nlmsghdr *)recvbuf; + + switch (resp->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(resp); + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + goto malformed_resp; + + switch (-err->error) { + case 0: + break; + + default: + virReportSystemError(-err->error, + _("error setting port profile on %s"), + linkdev); + rc = -1; + } + break; + case NLMSG_DONE: + break; + + default: + goto malformed_resp; + } + + VIR_FREE(recvbuf); + + return rc; + +malformed_resp: + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + VIR_FREE(recvbuf); + return -1; + +buffer_too_small: + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("internal buffer is too small")); + + return -1; +} + +int +setPortProfileId(const char *linkdev, + char *profileid, + unsigned short *instance_uuid, + unsigned short *host_uuid) +{ + int rc; + + rc = ifaceDown(linkdev); + if (rc != 0) { + virReportSystemError(errno, + _("cannot 'down' interface %s"), + linkdev); + return -1; + } + + rc = portProfileIdMcast(linkdev, PORT_REQUEST_ASSOCIATE, + profileid, instance_uuid, host_uuid); + if (rc) + return rc; + + rc = ifaceUp(linkdev); + if (rc != 0) { + virReportSystemError(errno, + _("cannot 'up' interface %s"), + linkdev); + return -1; + } + + return rc; +} + +int +unsetPortProfileId(const char *linkdev) +{ + int rc; + + rc = portProfileIdMcast(linkdev, PORT_REQUEST_DISASSOCIATE, + NULL, NULL, NULL); + if (rc) + return rc; + + rc = ifaceDown(linkdev); + if (rc != 0) { + virReportSystemError(errno, + _("cannot 'down' interface %s"), + linkdev); + return -1; + } + + return rc; +} /** * openMacvtapTap: diff --git a/src/util/macvtap.h b/src/util/macvtap.h index 5d4ea5e..136779e 100644 --- a/src/util/macvtap.h +++ b/src/util/macvtap.h @@ -37,6 +37,13 @@ int openMacvtapTap(const char *ifname, void delMacvtap(const char *ifname); +int setPortProfileId(const char *linkdev, + char *profile_id, + unsigned short *instance_uuid, + unsigned short *host_uuid); + +int unsetPortProfileId(const char *linkdev); + # endif /* WITH_MACVTAP */ # define MACVTAP_MODE_PRIVATE_STR "private"

Scott Feldman <scofeldm@cisco.com> wrote on 05/20/2010 03:22:12 PM:
Next steps for V3, etc:
1) merge with Stefan's latest patch, as much as possible
Yes. Try to make it a patch on top of my V5 patch...
2) assign VM device mac addr to eth and set macvtap mac addr to random
Hm, why is this necessary? I have tested the macvtap device and always set it to the address that libvirt assigned to it and that happened to be the same as the one inside the VM? Is this something specific to your underlying driver?
3) figure out where host_uuid lives
I posted a message a while ago. It's not anywhere right now and should probably be provided via libvirt.conf overriding dmidecode, but dmidecode being used if its output is valid and resorting to a temporary (between libvirt restart) host uuid if everything fails...
diff --git a/src/util/macvtap.c b/src/util/macvtap.c index 1f8dd29..7803bc8 100644 --- a/src/util/macvtap.c +++ b/src/util/macvtap.c @@ -85,14 +85,14 @@ static void nlClose(int fd) * buffer will be returned. */ static -int nlComm(struct nlmsghdr *nlmsg, +int nlComm(struct nlmsghdr *nlmsg, int nlgroups, char **respbuf, int *respbuflen) { int rc = 0; struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK, .nl_pid = 0, - .nl_groups = 0, + .nl_groups = nlgroups,
I think if only you use this function, nlgroups can be 0 here. I would use different code to use multicast for talking to lldpad.
}; int rcvChunkSize = 1024; // expecting less than that int rcvoffset = 0; @@ -192,6 +192,27 @@ nlAppend(struct nlmsghdr *nlm, int totlen, const void *data, int datalen) return pos; }
+#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((char *) (nmsg)) +
NLMSG_ALIGN((nmsg)->nlmsg_len)))
+ +static struct rtattr *nlNest(struct nlmsghdr *nlm, int totlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(nlm); + + if (nlm->nlmsg_len + NLMSG_ALIGN(sizeof(*nest)) > totlen) + return NULL; + nest->rta_type = type; + nest->rta_len = RTA_LENGTH(0); + nlm->nlmsg_len += sizeof(*nest); + nlAlign(nlm); + return nest; +} + +static int nlNestEnd(struct nlmsghdr *nlm, struct rtattr *nest) +{ + nest->rta_len = (char *)NLMSG_TAIL(nlm) - (char *)nest; + return nlm->nlmsg_len; +}
At some point in the future someone may want to change all this netlink stuff to use libnl -- except for maybe in the case of the multicast message that I am sending in one of my patches and using select with a timeout to wait for a response from (future) lldpad where I am not sure whether an equivalent function exists in libnl...
+#if 0 +static int +get_host_uuid(char *host_uuid, int len)
this is not active code ... skipping it
+ +static int +portProfileIdMcast(const char *linkdev, + const char request, + const char *profileid, + const unsigned short *instance_uuid, + const unsigned short *host_uuid) +{ + struct rtattr *port_self; + char nlmsgbuf[512]; + struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp; + struct ifinfomsg infomsg = { .ifi_family = AF_UNSPEC }; + char rtattbuf[256]; + struct rtattr *rta; + char *recvbuf = NULL; + int recvbuflen; + struct nlmsgerr *err; + int rc = 0; + + memset(&nlmsgbuf, 0, sizeof(nlmsgbuf)); + nlInit(nlm, NLM_F_REQUEST, RTM_SETLINK); + + if (!nlAppend(nlm, sizeof(nlmsgbuf), &infomsg, sizeof(infomsg))) + goto buffer_too_small; + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME, + linkdev, strlen(linkdev) + 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + port_self = nlNest(nlm, sizeof(nlmsgbuf), IFLA_PORT_SELF); + if (!port_self) + return -1; + + switch (request) { + case PORT_REQUEST_ASSOCIATE: + if (profileid && strlen(profileid)) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_PROFILE, + profileid, strlen(profileid) + 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + if (instance_uuid) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_INSTANCE_UUID, + instance_uuid, PORT_UUID_MAX); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + if (host_uuid) { + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), + IFLA_PORT_HOST_UUID, + host_uuid, PORT_UUID_MAX); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + } + break; + case PORT_REQUEST_DISASSOCIATE: + break; + } + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_PORT_REQUEST, + &request, 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + nlNestEnd(nlm, port_self); + + if (nlComm(nlm, RTNLGRP_LINK, &recvbuf, &recvbuflen) < 0) + return -1; +
I know that it doesn't work like this in the netlink multicast case where libvirt will receive an error from the kernel and hopefully some time later a response from lldpad. Still it would be nice if we could use common code here for both technologies, but the unicast vs. multicast receiver code makes it quite different. Have a look at my patches. Stefan

On Thu, May 20, 2010 at 04:52:44PM -0400, Stefan Berger wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/20/2010 03:22:12 PM:
Next steps for V3, etc:
1) merge with Stefan's latest patch, as much as possible
Yes. Try to make it a patch on top of my V5 patch...
2) assign VM device mac addr to eth and set macvtap mac addr to random
Hm, why is this necessary? I have tested the macvtap device and always set it to the address that libvirt assigned to it and that happened to be the same as the one inside the VM? Is this something specific to your underlying driver?
3) figure out where host_uuid lives
I posted a message a while ago. It's not anywhere right now and should probably be provided via libvirt.conf overriding dmidecode, but dmidecode being used if its output is valid and resorting to a temporary (between libvirt restart) host uuid if everything fails...
Agreed, we should likely put it in /etc/libvirt/libvirtd.conf and have a convenience function in src/util/uuid.h called virGetHostUUID() so we avoid putting this logic in the drivers. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
participants (3)
-
Daniel P. Berrange
-
Scott Feldman
-
Stefan Berger