[libvirt] [libvirt PATCH] Port-profile ID support using IFLA_VF_PORT_PROFILE netlink msg

From: Scott Feldman <scofeldm@cisco.com> This fleshes out the port profile ID proof-of-concept patch posted earlier by David Allan, referenced here: https://www.redhat.com/archives/libvir-list/2010-March/msg01401.html It uses the new IFLA_VF_PORT_PROFILE netlink msg to set/unset the port- profile for the virtual switch port backing the VM device. The new netlink msg is being discussed on the netdev kernel mailing list here: http://marc.info/?l=linux-netdev&m=127312092712543&w=2 http://marc.info/?l=linux-netdev&m=127312093412556&w=2 IFLA_VF_PORT_PROFILE is sent using RTM_SETLINK, and retrieved using RTM_GETLINK. IFLA_VF_PORT_PROFILE is sent using netlink multicast send with RTNLGRP_LINK so the receiver of the msg can be in user-space or kernel-space. The device XML is: <interface type='direct'> <source dev='eth2' mode='private' profileid='dc_test'/> <mac address='00:16:3e:1a:b3:4b'/> </interface> The port-profile ID msg is sent to source dev. Tested with Cisco 10G Ethernet NIC using port-profiles defined in Cisco's Unified Computing System Management software and above referenced kernel patches. Signed-off-by: Scott Feldman <scofeldm@cisco.com> Signed-off-by: Roopa Prabhu<roprabhu@cisco.com> --- src/conf/domain_conf.c | 13 +++ src/conf/domain_conf.h | 1 src/libvirt_macvtap.syms | 2 src/qemu/qemu_conf.c | 7 ++ src/qemu/qemu_driver.c | 10 ++ src/util/macvtap.c | 200 +++++++++++++++++++++++++++++++++++++++++++++- src/util/macvtap.h | 6 + 7 files changed, 233 insertions(+), 6 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 3e45f79..968076f 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -484,6 +484,7 @@ void virDomainNetDefFree(virDomainNetDefPtr def) case VIR_DOMAIN_NET_TYPE_DIRECT: VIR_FREE(def->data.direct.linkdev); + VIR_FREE(def->data.direct.profileid); break; case VIR_DOMAIN_NET_TYPE_USER: @@ -1831,6 +1832,7 @@ virDomainNetDefParseXML(virCapsPtr caps, char *internal = NULL; char *devaddr = NULL; char *mode = NULL; + char *profileid = NULL; virNWFilterHashTablePtr filterparams = NULL; if (VIR_ALLOC(def) < 0) { @@ -1873,6 +1875,7 @@ virDomainNetDefParseXML(virCapsPtr caps, xmlStrEqual(cur->name, BAD_CAST "source")) { dev = virXMLPropString(cur, "dev"); mode = virXMLPropString(cur, "mode"); + profileid = virXMLPropString(cur, "profileid"); } else if ((network == NULL) && ((def->type == VIR_DOMAIN_NET_TYPE_SERVER) || (def->type == VIR_DOMAIN_NET_TYPE_CLIENT) || @@ -2049,6 +2052,11 @@ virDomainNetDefParseXML(virCapsPtr caps, } else def->data.direct.mode = VIR_DOMAIN_NETDEV_MACVTAP_MODE_VEPA; + if (profileid != NULL) { + def->data.direct.profileid = profileid; + profileid = NULL; + } + def->data.direct.linkdev = dev; dev = NULL; @@ -2114,6 +2122,7 @@ cleanup: VIR_FREE(internal); VIR_FREE(devaddr); VIR_FREE(mode); + VIR_FREE(profileid); virNWFilterHashTableFree(filterparams); return def; @@ -5140,6 +5149,10 @@ virDomainNetDefFormat(virBufferPtr buf, def->data.direct.linkdev); virBufferVSprintf(buf, " mode='%s'", virDomainNetdevMacvtapTypeToString(def->data.direct.mode)); + if (def->data.direct.profileid) { + virBufferEscapeString(buf, " profileid='%s'", + def->data.direct.profileid); + } virBufferAddLit(buf, "/>\n"); break; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fadc8bd..30ebf07 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -290,6 +290,7 @@ struct _virDomainNetDef { struct { char *linkdev; int mode; + char *profileid; } direct; } data; char *ifname; diff --git a/src/libvirt_macvtap.syms b/src/libvirt_macvtap.syms index ae229a0..9d4652e 100644 --- a/src/libvirt_macvtap.syms +++ b/src/libvirt_macvtap.syms @@ -3,3 +3,5 @@ # macvtap.h openMacvtapTap; delMacvtap; +setPortProfileId; +unsetPortProfileId; diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..aff6f28 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1479,6 +1479,11 @@ qemudPhysIfaceConnect(virConnectPtr conn, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1; + if (!STREQ(net->data.direct.profileid, "")) + setPortProfileId(net->data.direct.linkdev, + net->data.direct.profileid, + net->mac); + rc = openMacvtapTap(net->ifname, net->mac, linkdev, brmode, &res_ifname, vnet_hdr); if (rc >= 0) { @@ -1501,6 +1506,8 @@ qemudPhysIfaceConnect(virConnectPtr conn, close(rc); rc = -1; delMacvtap(net->ifname); + if (!STREQ(net->data.direct.profileid, "")) + unsetPortProfileId(net->data.direct.linkdev); } } } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index bb1079e..6ea37d4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3586,8 +3586,11 @@ static void qemudShutdownVMDaemon(struct qemud_driver *driver, for (i = 0; i < def->nnets; i++) { virDomainNetDefPtr net = def->nets[i]; if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (net->ifname) + if (net->ifname) { delMacvtap(net->ifname); + if (!STREQ(net->data.direct.profileid, "")) + unsetPortProfileId(net->data.direct.linkdev); + } } } #endif @@ -8147,8 +8150,11 @@ qemudDomainDetachNetDevice(struct qemud_driver *driver, #if WITH_MACVTAP if (detach->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (detach->ifname) + if (detach->ifname) { delMacvtap(detach->ifname); + if (!STREQ(detach->data.direct.profileid, "")) + unsetPortProfileId(detach->data.direct.linkdev); + } } #endif diff --git a/src/util/macvtap.c b/src/util/macvtap.c index 5d129fd..825cf30 100644 --- a/src/util/macvtap.c +++ b/src/util/macvtap.c @@ -85,14 +85,14 @@ static void nlClose(int fd) * buffer will be returned. */ static -int nlComm(struct nlmsghdr *nlmsg, +int nlComm(struct nlmsghdr *nlmsg, int nlgroups, char **respbuf, int *respbuflen) { int rc = 0; struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK, .nl_pid = 0, - .nl_groups = 0, + .nl_groups = nlgroups, }; int rcvChunkSize = 1024; // expecting less than that int rcvoffset = 0; @@ -287,7 +287,7 @@ link_add(const char *type, li->rta_len = (char *)nlm + nlm->nlmsg_len - (char *)li; - if (nlComm(nlm, &recvbuf, &recvbuflen) < 0) + if (nlComm(nlm, 0, &recvbuf, &recvbuflen) < 0) return -1; if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) @@ -371,7 +371,7 @@ link_del(const char *name) if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) goto buffer_too_small; - if (nlComm(nlm, &recvbuf, &recvbuflen) < 0) + if (nlComm(nlm, 0, &recvbuf, &recvbuflen) < 0) return -1; if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) @@ -568,6 +568,198 @@ configMacvtapTap(int tapfd, int vnet_hdr) return 0; } +static int +get_host_uuid(char *host_uuid, int len) +{ + const char *dmidecodearg[] = { "dmidecode", "-s", "system-uuid", NULL }; + const char *const dmidecodeenv[] = { "LC_ALL=C", NULL }; + char *binary, *newline; + int dmidecodestdout = -1; + int ret = -1; + pid_t child; + + binary = virFindFileInPath(dmidecodearg[0]); + if (binary == NULL || access(binary, X_OK) != 0) { + VIR_FREE(binary); + return -1; + } + dmidecodearg[0] = binary; + + if (virExec(dmidecodearg, dmidecodeenv, NULL, + &child, -1, &dmidecodestdout, NULL, VIR_EXEC_CLEAR_CAPS) < 0) { + ret = -1; + goto cleanup; + } + + if((ret = saferead(dmidecodestdout, host_uuid, len)) <= 0) { + ret = -1; + goto cleanup; + } + host_uuid[ret-1] = '\0'; + + /* strip newline */ + newline = strrchr(host_uuid, '\n'); + if (newline) + *newline = '\0'; + + ret = 0; + +cleanup: + VIR_FREE(binary); + + if (close(dmidecodestdout) < 0) + ret = -1; + + return ret; +} + + +static int sendPortProfileMulticastMsg(const char *linkdev, + struct ifla_vf_port_profile *ivp) +{ + int rc = 0; + char nlmsgbuf[512]; + struct nlmsghdr *nlm = (struct nlmsghdr *)nlmsgbuf, *resp; + char *recvbuf = NULL; + struct nlmsgerr *err; + char rtattbuf[256]; + struct rtattr *rta; + int recvbuflen; + int ifindex; + struct ifinfomsg i = { .ifi_family = AF_UNSPEC }; + + if (ifaceGetIndex(true, linkdev, &ifindex) != 0) + return -1; + + memset(&nlmsgbuf, 0, sizeof(nlmsgbuf)); + nlInit(nlm, NLM_F_REQUEST, RTM_SETLINK); + + if (!nlAppend(nlm, sizeof(nlmsgbuf), &i, sizeof(i))) + goto buffer_too_small; + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_IFNAME, + linkdev, strlen(linkdev) + 1); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + rta = rtattrCreate(rtattbuf, sizeof(rtattbuf), IFLA_VF_PORT_PROFILE, + ivp, sizeof(*ivp)); + if (!rta) + goto buffer_too_small; + + if (!nlAppend(nlm, sizeof(nlmsgbuf), rtattbuf, rta->rta_len)) + goto buffer_too_small; + + if (nlComm(nlm, RTNLGRP_LINK, &recvbuf, &recvbuflen) < 0) + return -1; + + if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) + goto malformed_resp; + + resp = (struct nlmsghdr *)recvbuf; + + switch (resp->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(resp); + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + goto malformed_resp; + + switch (-err->error) { + case 0: + break; + + default: + virReportSystemError(-err->error, + _("error setting port profile on %s"), + linkdev); + rc = -1; + } + break; + case NLMSG_DONE: + break; + + default: + goto malformed_resp; + } + + VIR_FREE(recvbuf); + + return rc; + +malformed_resp: + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + VIR_FREE(recvbuf); + return -1; + +buffer_too_small: + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("internal buffer is too small")); + + return -1; +} + + +int unsetPortProfileId(const char *linkdev) +{ + int rc = 0; + struct ifla_vf_port_profile ivp; + + memset(&ivp, 0, sizeof(struct ifla_vf_port_profile)); + ivp.vf = -1; + + if(!(rc = sendPortProfileMulticastMsg(linkdev, &ivp))) { + rc = ifaceDown(linkdev); + if (rc != 0) { + virReportSystemError(errno, + ("cannot 'down' interface %s"), + linkdev); + //Should we error out ? + //rc = -1; + } + } + + return rc; +} + +int setPortProfileId(const char *linkdev, + const char *profileid, + unsigned char *macaddress) +{ + int rc = 0; + struct ifla_vf_port_profile ivp; + char host_uuid[IFLA_VF_UUID_MAX] = "\0"; + + if (!profileid) + return -EINVAL; + + memset(&ivp, 0, sizeof(struct ifla_vf_port_profile)); + ivp.vf = -1; + strncpy((char *)ivp.port_profile, profileid, sizeof(ivp.port_profile)); + ivp.port_profile[sizeof(ivp.port_profile)-1] = '\0'; + memcpy(ivp.mac, macaddress, sizeof(ivp.mac)); + get_host_uuid(host_uuid, IFLA_VF_UUID_MAX); + if (strlen(host_uuid)) { + strncpy((char *)ivp.host_uuid, host_uuid, sizeof(ivp.host_uuid)); + ivp.port_profile[sizeof(ivp.port_profile)-1] = '\0'; + } + + if(!(rc = sendPortProfileMulticastMsg(linkdev, &ivp))) { + rc = ifaceUp(linkdev); + if (rc != 0) { + virReportSystemError(errno, + ("cannot 'up' interface %s"), + linkdev); + // Should we error out of here ? + //rc = -1; + } + } + + return rc; +} /** * openMacvtapTap: diff --git a/src/util/macvtap.h b/src/util/macvtap.h index 5d4ea5e..7f58a13 100644 --- a/src/util/macvtap.h +++ b/src/util/macvtap.h @@ -37,6 +37,12 @@ int openMacvtapTap(const char *ifname, void delMacvtap(const char *ifname); +int setPortProfileId(const char *linkdev, + const char *profileid, + unsigned char *macaddress); + +int unsetPortProfileId(const char *linkdev); + # endif /* WITH_MACVTAP */ # define MACVTAP_MODE_PRIVATE_STR "private"

libvir-list-bounces@redhat.com wrote on 05/08/2010 03:05:29 AM:
From: Scott Feldman <scofeldm@cisco.com>
This fleshes out the port profile ID proof-of-concept patch posted earlier by David Allan, referenced here:
https://www.redhat.com/archives/libvir-list/2010-March/msg01401.html
It uses the new IFLA_VF_PORT_PROFILE netlink msg to set/unset the port- profile for the virtual switch port backing the VM device. The new netlink msg is being discussed on the netdev kernel mailing list here:
http://marc.info/?l=linux-netdev&m=127312092712543&w=2 http://marc.info/?l=linux-netdev&m=127312093412556&w=2
IFLA_VF_PORT_PROFILE is sent using RTM_SETLINK, and retrieved using RTM_GETLINK. IFLA_VF_PORT_PROFILE is sent using netlink multicast send with RTNLGRP_LINK so the receiver of the msg can be in user-space or kernel-space.
The device XML is:
<interface type='direct'> <source dev='eth2' mode='private' profileid='dc_test'/> <mac address='00:16:3e:1a:b3:4b'/> </interface>
The port-profile ID msg is sent to source dev.
Great. Now we have two competing implementations where the underlying technology is supposed to be VEPA in both but the parameters to set it up are vastly different -- if you compare against Vivek's post yesterfa. Above you are providing a profile id in form of a string. Is that string above just a dummy example or a real-world parameter that can actually be passed? Vivek posted a message yesterday showing now 4 different parameters... are these somehow encoded in the profileid in your case or you simply don't need them? I see you are getting the host UUID vid dmidecode, so there are still3 parameters left. Anyway, I let you guys figure that out. I suppose in your case we would use the external daemon to derive eth0 from eth0.100 where the macvtap would be connected on along with the vlan id in eth0.100. So the functions I posted yesterday may need to go into that code then.
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..aff6f28 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1479,6 +1479,11 @@ qemudPhysIfaceConnect(virConnectPtr conn, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1;
+ if (!STREQ(net->data.direct.profileid, "")) + setPortProfileId(net->data.direct.linkdev, + net->data.direct.profileid, + net->mac); +
Since setting up a port profile seems to be a step tightly connected to opening the macvtap I'd push this into the openMactapTap function.
rc = openMacvtapTap(net->ifname, net->mac, linkdev, brmode, &res_ifname, vnet_hdr); if (rc >= 0) { @@ -1501,6 +1506,8 @@ qemudPhysIfaceConnect(virConnectPtr conn, close(rc); rc = -1; delMacvtap(net->ifname); + if (!STREQ(net->data.direct.profileid, "")) + unsetPortProfileId(net->data.direct.linkdev); }
Same here, push it into the delMacvtap function.
} } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index bb1079e..6ea37d4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3586,8 +3586,11 @@ static void qemudShutdownVMDaemon(struct qemud_driver *driver, for (i = 0; i < def->nnets; i++) { virDomainNetDefPtr net = def->nets[i]; if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (net->ifname) + if (net->ifname) { delMacvtap(net->ifname); + if (!STREQ(net->data.direct.profileid, "")) + unsetPortProfileId(net->data.direct.linkdev); + }
Same.
} } #endif @@ -8147,8 +8150,11 @@ qemudDomainDetachNetDevice(struct qemud_driver
*driver,
#if WITH_MACVTAP if (detach->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (detach->ifname) + if (detach->ifname) { delMacvtap(detach->ifname); + if (!STREQ(detach->data.direct.profileid, "")) + unsetPortProfileId(detach->data.direct.linkdev); + }
Same.
+ +int setPortProfileId(const char *linkdev, + const char *profileid, + unsigned char *macaddress) +{ + int rc = 0; + struct ifla_vf_port_profile ivp; + char host_uuid[IFLA_VF_UUID_MAX] = "\0"; + + if (!profileid) + return -EINVAL; + + memset(&ivp, 0, sizeof(struct ifla_vf_port_profile)); + ivp.vf = -1; + strncpy((char *)ivp.port_profile, profileid, sizeof(ivp.port_profile));
use the libvirt function to copy string
+ ivp.port_profile[sizeof(ivp.port_profile)-1] = '\0'; + memcpy(ivp.mac, macaddress, sizeof(ivp.mac)); + get_host_uuid(host_uuid, IFLA_VF_UUID_MAX);
check for error
+ if (strlen(host_uuid)) { + strncpy((char *)ivp.host_uuid, host_uuid, sizeof(ivp.host_uuid));
use libvirt function to copy string
+ ivp.port_profile[sizeof(ivp.port_profile)-1] = '\0'; + } + + if(!(rc = sendPortProfileMulticastMsg(linkdev, &ivp))) { + rc = ifaceUp(linkdev); + if (rc != 0) { + virReportSystemError(errno, + ("cannot 'up' interface %s"), + linkdev); + // Should we error out of here ? + //rc = -1; + } + } + + return rc; +}
/** * openMacvtapTap: diff --git a/src/util/macvtap.h b/src/util/macvtap.h index 5d4ea5e..7f58a13 100644 --- a/src/util/macvtap.h +++ b/src/util/macvtap.h @@ -37,6 +37,12 @@ int openMacvtapTap(const char *ifname,
void delMacvtap(const char *ifname);
+int setPortProfileId(const char *linkdev, + const char *profileid, + unsigned char *macaddress); + +int unsetPortProfileId(const char *linkdev); +
Remove after pushing the function into open* and del* functions. Regards, Stefan
# endif /* WITH_MACVTAP */
# define MACVTAP_MODE_PRIVATE_STR "private"
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 5/8/10 12:12 PM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
From: Scott Feldman <scofeldm@cisco.com> The device XML is:
<interface type='direct'> <source dev='eth2' mode='private' profileid='dc_test'/> <mac address='00:16:3e:1a:b3:4b'/> </interface>
The port-profile ID msg is sent to source dev.
Great. Now we have two competing implementations where the underlying technology is supposed to be VEPA in both but the parameters to set it up are vastly different -- if you compare against Vivek's post yesterfa. Above you are providing a profile id in form of a string. Is that string above just a dummy example or a real-world parameter that can actually be passed?
It was an example string. The RTM_SETLINK IFLA_VF_PORT_PROFILE msg type uses u8 array for port_profile, with the idea that it can hold a string (as in the above example) or some encoded bytes.
Vivek posted a message yesterday showing now 4 different parameters... are these somehow encoded in the profileid in your case or you simply don't need them?
Yes, somehow encoded. There was discussion on the kernel netdev mailing list on how to merge the VDP tuple into the port-profile, but I haven't seen the final form. Can you help push that discussion along? Our case doesn't need the VDP tuple encoded in port-profile. We need a string to identify the desired port-profile. I think we can agree on these goals: 1) single RTM_SETLINK netlink msg type for set/unset of port-profile 2) single method in libvirt to send port-profile using RTM_SETLINK 3) single representation in XML I'm not sure is 3) is possible given the different encodings of port-profile. Can the VDP tuple be represented as a string, e.g. "1.2345.6"?
I see you are getting the host UUID vid dmidecode, so there are still3 parameters left. Anyway, I let you guys figure that out.
Ideally, we'd like to have host UUID, guest UUID, and even name of guest port, if available. Any extra information passed with the port-profile helps mgmt software organize the virtual ports.
I suppose in your case we would use the external daemon to derive eth0 from eth0.100 where the macvtap would be connected on along with the vlan id in eth0.100. So the functions I posted yesterday may need to go into that code then.
In our case, the src device driver in the kernel receives the RTM_SETLINK msg directly; there is no external daemon on the host. Our driver will handle the RTM_SETLINK msg directly to make sure the virtual port is set up accordingly. Note the RTM_SETLINK msg is sent with multicast RTNLGRP_LINK so either a host daemon or a kernel netdev driver may receive the port-profile msg.
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..aff6f28 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1479,6 +1479,11 @@ qemudPhysIfaceConnect(virConnectPtr conn, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1;
+ if (!STREQ(net->data.direct.profileid, "")) + setPortProfileId(net->data.direct.linkdev, + net->data.direct.profileid, + net->mac); +
Since setting up a port profile seems to be a step tightly connected to opening the macvtap I'd push this into the openMactapTap function.
I don't think port-profile should be tightly coupled with macvtap. For example, port-profile would be applicable for vhost-net where the emu device sits right on top of the kernel netdev. There is no macvtap in that case. Here are some of the examples use-cases for port-profile that don't involve macvtap: PCI device passthru vhost-net software tagging switch over simple nics that passthru tags I'm sure there will be other ways invented to plumb the virtual device to the guest interface.
use the libvirt function to copy string
check for error
use libvirt function to copy string
I'll resend with this fixes. Thanks for the review. -scott

chrisw, libvir-list, libvir-list-bounces, Vivek Kashyap
On 5/8/10 12:12 PM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
From: Scott Feldman <scofeldm@cisco.com> The device XML is:
<interface type='direct'> <source dev='eth2' mode='private' profileid='dc_test'/> <mac address='00:16:3e:1a:b3:4b'/> </interface>
The port-profile ID msg is sent to source dev.
Great. Now we have two competing implementations where the underlying technology is supposed to be VEPA in both but the parameters to set it up are vastly different -- if you compare against Vivek's post yesterfa. Above you are providing a profile id in form of a string. Is that string above just a dummy example or a real-world parameter
Scott Feldman <scofeldm@cisco.com> wrote on 05/08/2010 07:28:11 PM: that
can actually be passed?
It was an example string. The RTM_SETLINK IFLA_VF_PORT_PROFILE msg type uses u8 array for port_profile, with the idea that it can hold a string (as in the above example) or some encoded bytes.
Vivek posted a message yesterday showing now 4 different parameters... are these somehow encoded in the profileid in your case or you simply don't need them?
Yes, somehow encoded. There was discussion on the kernel netdev mailing list on how to merge the VDP tuple into the port-profile, but I haven't seen the final form. Can you help push that discussion along? Our case doesn't need the VDP tuple encoded in port-profile. We need a string to identify the desired port-profile.
I thought this was the work of the standards committee ... If I understand the situation correctly, then there is a setup protocol that needs to be run with the switch to setup parameters for clients' network streams on a port. That protocol needs a couple of parameters. In your case you seem to need the profile id and then the hosts' uuid. What else do you need? Do you need the manager ID in that protocol + type id + type id version as proposed yesterday? Or is the protocol not 100% defined, yet?
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via
dimdecode
I think we can agree on these goals:
1) single RTM_SETLINK netlink msg type for set/unset of port-profile 2) single method in libvirt to send port-profile using RTM_SETLINK 3) single representation in XML
I'm not sure is 3) is possible given the different encodings of port-profile. Can the VDP tuple be represented as a string, e.g. "1.2345.6"?
This is fine by me, but we could also split it up into different fields. I assume that different vendors' switches will all somehow need to see the same parameters so they can run the protocol? Virtual machines will also be able to migrate to hosts that are connected to different vendors' switches and then will always present the same set of parameters since they migrate along. So I hope this is completely independent of what vendor's switch is connected to a host.
I see you are getting the host UUID vid dmidecode, so there are still3 parameters left. Anyway, I let you
guys
figure that out.
Ideally, we'd like to have host UUID, guest UUID, and even name of guest port, if available. Any extra information passed with the port-profile
helps mgmt software organize the virtual ports.
I suppose in your case we would use the external daemon to derive eth0 from eth0.100 where the macvtap would be connected on along with the vlan id in eth0.100. So
The guest UUID is available in libvirt and can be passed through to where it is needed but I didn't see you sending it via the netlink message so far. What is a 'guest port'? The port the cable from the host is connected to the switch (port)? the
functions I posted yesterday may need to go into that code then.
In our case, the src device driver in the kernel receives the RTM_SETLINK msg directly; there is no external daemon on the host. Our driver will handle the RTM_SETLINK msg directly to make sure the virtual port is set up accordingly.
Yeah, I guess you can figure out in the kernel whether to contact the hardware for eth{0, 1, or 2} to run the protocol on.
Note the RTM_SETLINK msg is sent with multicast RTNLGRP_LINK so either a host daemon or a kernel netdev driver may receive the port-profile msg.
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 5fa8c0a..aff6f28 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1479,6 +1479,11 @@ qemudPhysIfaceConnect(virConnectPtr conn, net->model && STREQ(net->model, "virtio")) vnet_hdr = 1;
+ if (!STREQ(net->data.direct.profileid, "")) + setPortProfileId(net->data.direct.linkdev, + net->data.direct.profileid, + net->mac); +
Since setting up a port profile seems to be a step tightly connected to opening the macvtap I'd push this into the openMactapTap function.
I don't think port-profile should be tightly coupled with macvtap. For example, port-profile would be applicable for vhost-net where the emu
device
sits right on top of the kernel netdev. There is no macvtap in that case.
Though it seemed a pattern that when a macvtap was opened or closed that the function to set or unset the port profile was always called. So instead of calling the close macvtap function + the unset function for the profile in 3 different places, just call the close macvtap function in 3 places and have the close macvtap call the unset function for the profile. If other components can also use the profile related functions, we can still export them, but at the moment we don't have the necessary parameters available if anything else than the 'direct' type of interface was to be used.
Here are some of the examples use-cases for port-profile that don't involve macvtap:
PCI device passthru vhost-net software tagging switch over simple nics that passthru tags
I'm sure there will be other ways invented to plumb the virtual device to the guest interface.
Yes, as said, the setPortProfile can then become {vsi|vepa}SetPortProfile, and be exported for other functions to use.
use the libvirt function to copy string
check for error
use libvirt function to copy string
I'll resend with this fixes. Thanks for the review.
Yes, and you are using #define's that aren't available for many, yet, so you may need to add #ifdef's around certain code parts and fail the function with error related to unavailable kernel functionality if #ifndef. Also we'll probably need an rpm dependency on the dmidecode package. Stefan
-scott

<cut...>
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via dimdecode
The 'VSI Instance ID' is associated with a virtual interface. Therefore, a guest might have multiple VSI-instance IDs - each associated with a separate virtual NIC.
I think we can agree on these goals:
1) single RTM_SETLINK netlink msg type for set/unset of port-profile 2) single method in libvirt to send port-profile using RTM_SETLINK 3) single representation in XML
I'm not sure is 3) is possible given the different encodings of port-profile. Can the VDP tuple be represented as a string, e.g. "1.2345.6"?
This is fine by me, but we could also split it up into different fields.
Would splitting into different fields make it easier for the recipients to only access the needed fields rather than parse a complex string? If so, splitting it makes it easier.
I assume that different vendors' switches will all somehow need to see the same parameters so they can run the protocol? Virtual machines will also be able to migrate to hosts that are connected to different vendors' switches and then will always present the same set of parameters since they migrate along. So I hope this is completely independent of what vendor's switch is connected to a host.
I see you are getting the host UUID vid dmidecode, so there are still3 parameters left. Anyway, I let you
guys
figure that out.
Ideally, we'd like to have host UUID, guest UUID, and even name of guest port, if available. Any extra information passed with the port-profile
Is guest port == virtual interface? And the name as seen in guest?? thanks Vivek

kashyapv@linux.vnet.ibm.com wrote on 05/10/2010 03:30:10 AM:
<cut...>
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of
via
dimdecode
The 'VSI Instance ID' is associated with a virtual interface. Therefore, a guest might have multiple VSI-instance IDs - each associated with a separate virtual NIC.
Alright, then this becomes the 3rd UUID (besides guest and host UUID that Scott seems to want) to initiate the setup protocol with the switch. So the list of parameters above is necessary to be provided from the 'outside'. I am wondering what the first three parameters are related to. Do they reflect specifics of a particular attached switch ? Should this information migrate with a VM to another switch and possibly cause the setup protocol to fail because that switch requires a different manager, type or type version ID for example? Not that this would then make things easier at all (to code), but at least it would provide a correct long term solution if this information actually did not go into VM metadata but was a host's local switch configuration data that could be different for every attached Ethernet interface. This information would have to then go into some local configuration file that libvirt can read when needed. Stefan

On Mon, 10 May 2010, Stefan Berger wrote:
kashyapv@linux.vnet.ibm.com wrote on 05/10/2010 03:30:10 AM:
<cut...>
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via dimdecode
The 'VSI Instance ID' is associated with a virtual interface. Therefore, a guest might have multiple VSI-instance IDs - each associated with a
separate
virtual NIC.
Alright, then this becomes the 3rd UUID (besides guest and host UUID that Scott seems to want) to initiate the setup protocol with the switch. So the list of parameters above is necessary to be provided from the 'outside'.
Yes.
I am wondering what the first three parameters are related to. Do they reflect specifics of a particular attached switch ? Should this information migrate
Yes, these are keys to the database that the switch consults to impose the profiles (filter rules, qos etc.). The 'VSI Mgr Id' is the specific database, and VSI_type_ID is the specific profile to be applied. It is expected that the fabric management entity may create different versions of the profiles. Therefore, the version may be used in retrieving the exact profile to be applied. The KVM host is really telling the switch the parameters to the actual profile to be retrieved and associated to a particular virtual network interface (mac/vlan) instance.
with a VM to another switch and possibly cause the setup protocol to fail because that switch requires a different manager, type or type version ID for example?
The switch will, using other link level protocols, access the database and download the profile. The switch is not tied to the database but is only told which database to download the profiles from. Vivek
Not that this would then make things easier at all (to code), but at least it would provide a correct long term solution if this information actually did not go into VM metadata but was a host's local switch configuration data that could be different for every attached Ethernet interface. This information would have to then go into some local configuration file that libvirt can read when needed.
Stefan

On Sat, May 08, 2010 at 10:13:36PM -0400, Stefan Berger wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/08/2010 07:28:11 PM:
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via
dimdecode
The code can't rely on using dmidecode I'm afraid. Far too many manufacturers / machines have SMBIOS UUID field filled with complete garbage. eg my main server shows # dmidecode |grep -i uuid UUID: Not Settable So if we need a host UUID, I think we need to be able to either set it in the XML, or have it set in a per host config for the QEMU driver in libvirt. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

"Daniel P. Berrange" <berrange@redhat.com> wrote on 05/12/2010 08:48:58 AM:
On Sat, May 08, 2010 at 10:13:36PM -0400, Stefan Berger wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/08/2010 07:28:11 PM:
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via
dimdecode
The code can't rely on using dmidecode I'm afraid. Far too many manufacturers / machines have SMBIOS UUID field filled with complete garbage. eg my main server shows
# dmidecode |grep -i uuid UUID: Not Settable
So if we need a host UUID, I think we need to be able to either set it
the XML, or have it set in a per host config for the QEMU driver in
in libvirt. I wouldn't make it QEMU specific. It should probably go into libvirt.conf, but then libvirt.conf would need to be modified on every machine when installed. How should the code react if no valid UUID is found in the config file? Create a temporary one that changes with every restart of libvirt? Also dmidecode should probably be used on those machine where it returns a valid UUID. Once that would be done, we could show the UUID via 'virsh capabilities' so management software can find it. If above doesn't sound right, then providing it via the XML in vsi is probably the way to go. Stefan
Daniel -- |: Red Hat, Engineering, London -o-
http://people.redhat.com/berrange/:|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org:| |: http://autobuild.org -o- http://search.cpan.org/~danberr/:| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On Wed, May 12, 2010 at 10:06:34AM -0400, Stefan Berger wrote:
"Daniel P. Berrange" <berrange@redhat.com> wrote on 05/12/2010 08:48:58 AM:
On Sat, May 08, 2010 at 10:13:36PM -0400, Stefan Berger wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/08/2010 07:28:11 PM:
VSI Manager ID 1 octet VSI Type ID 3 octets VSI Type ID Version 1 octet VSI Instance ID 16 octets <-- taken care of via
dimdecode
The code can't rely on using dmidecode I'm afraid. Far too many manufacturers / machines have SMBIOS UUID field filled with complete garbage. eg my main server shows
# dmidecode |grep -i uuid UUID: Not Settable
So if we need a host UUID, I think we need to be able to either set it
the XML, or have it set in a per host config for the QEMU driver in
in libvirt.
I wouldn't make it QEMU specific. It should probably go into libvirt.conf, but then libvirt.conf would need to be modified on every machine when installed. How should the code react if no valid UUID is found in the config file? Create a temporary one that changes with every restart of libvirt? Also dmidecode should probably be used on those machine where it returns a valid UUID.
How do we decide that dmidecode is showing a valid UUID though. Is a UUID of all 0's, all 1's or all F's valid ?
Once that would be done, we could show the UUID via 'virsh capabilities' so management software can find it.
Agree, it would be useful to expose a host UUID in the capabilities. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

I wouldn't make it QEMU specific. It should probably go into
"Daniel P. Berrange" <berrange@redhat.com> wrote on 05/12/2010 10:09:41 AM: libvirt.conf,
but then libvirt.conf would need to be modified on every machine when installed. How should the code react if no valid UUID is found in the config file? Create a temporary one that changes with every restart of libvirt? Also dmidecode should probably be used on those machine where it returns a valid UUID.
How do we decide that dmidecode is showing a valid UUID though. Is a UUID of all 0's, all 1's or all F's valid ?
With high probability, 32 hex chars that are the same can probably be considered 'wrong'. Some details on how one could handle the different cases of valid/invalid UUIDs: libvirtd.conf dmidecode handling valid UUID valid UUID libvirtd.conf UUID overrides dmidecode's UUID invalid UUID invalid UUID generate a temporary one; new one after libvirtd restart valid UUID invalid UUID use valid UUID invalid UUID valid UUID use valid UUID Stefan

On Sat, 2010-05-08 at 00:05 -0700, Scott Feldman wrote:
From: Scott Feldman <scofeldm@cisco.com>
This fleshes out the port profile ID proof-of-concept patch posted earlier by David Allan, referenced here:
https://www.redhat.com/archives/libvir-list/2010-March/msg01401.html
It uses the new IFLA_VF_PORT_PROFILE netlink msg to set/unset the port- profile for the virtual switch port backing the VM device. The new netlink msg is being discussed on the netdev kernel mailing list here:
http://marc.info/?l=linux-netdev&m=127312092712543&w=2 http://marc.info/?l=linux-netdev&m=127312093412556&w=2
IFLA_VF_PORT_PROFILE is sent using RTM_SETLINK, and retrieved using RTM_GETLINK. IFLA_VF_PORT_PROFILE is sent using netlink multicast send with RTNLGRP_LINK so the receiver of the msg can be in user-space or kernel-space.
I tried the latest libvirt (from git) to compile with this patch: util/macvtap.c:618: warning: 'struct ifla_vf_port_profile' declared inside parameter list util/macvtap.c:618: warning: its scope is only this definition or declaration, which is probably not what you want util/macvtap.c: In function 'sendPortProfileMulticastMsg': util/macvtap.c:648: error: 'IFLA_VF_PORT_PROFILE' undeclared (first use in this function) util/macvtap.c:648: error: (Each undeclared identifier is reported only once util/macvtap.c:648: error: for each function it appears in.) util/macvtap.c:649: error: dereferencing pointer to incomplete type util/macvtap.c: In function 'unsetPortProfileId': util/macvtap.c:709: error: storage size of 'ivp' isn't known util/macvtap.c:711: error: invalid application of 'sizeof' to incomplete type 'struct ifla_vf_port_profile' util/macvtap.c:709: warning: unused variable 'ivp' [-Wunused-variable] util/macvtap.c: In function 'setPortProfileId': util/macvtap.c:733: error: storage size of 'ivp' isn't known util/macvtap.c:734: error: 'IFLA_VF_UUID_MAX' undeclared (first use in this function) util/macvtap.c:739: error: invalid application of 'sizeof' to incomplete type 'struct ifla_vf_port_profile' util/macvtap.c:734: warning: unused variable 'host_uuid' [-Wunused-variable] util/macvtap.c:733: warning: unused variable 'ivp' [-Wunused-variable] make[3]: *** [libvirt_util_la-macvtap.lo] Error 1 As of 2.6.34-rc7, 'IFLA_VF_PORT_PROFILE' is not yet in the kernel. Any estimate when this might change? -- Best regards, Gerhard Stenzel, ----------------------------------------------------------------------------------------------------------------------------------- IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Martin Jetter Geschäftsführung: Dirk Wittkopp Sitz der Gesellschaft: Böblingen Registergericht: Amtsgericht Stuttgart, HRB 243294

* Gerhard Stenzel (gstenzel@linux.vnet.ibm.com) wrote:
As of 2.6.34-rc7, 'IFLA_VF_PORT_PROFILE' is not yet in the kernel. Any estimate when this might change?
Expected to change Real Soon Now. So while the upstream kernel netlink interface isn't 100% solidified, it's close enough that it's useful to discuss the libvirt internal implementation of carrying the port profile and pushing relevant data through the kernel's netlink interface. thanks, -chris

On 5/10/10 2:26 AM, "Gerhard Stenzel" <gstenzel@linux.vnet.ibm.com> wrote:
I tried the latest libvirt (from git) to compile with this patch:
util/macvtap.c:618: warning: 'struct ifla_vf_port_profile' declared inside parameter list util/macvtap.c:618: warning: its scope is only this definition or declaration, which is probably not what you want util/macvtap.c: In function 'sendPortProfileMulticastMsg': util/macvtap.c:648: error: 'IFLA_VF_PORT_PROFILE' undeclared (first use in this function) util/macvtap.c:648: error: (Each undeclared identifier is reported only once util/macvtap.c:648: error: for each function it appears in.) util/macvtap.c:649: error: dereferencing pointer to incomplete type util/macvtap.c: In function 'unsetPortProfileId': util/macvtap.c:709: error: storage size of 'ivp' isn't known util/macvtap.c:711: error: invalid application of 'sizeof' to incomplete type 'struct ifla_vf_port_profile' util/macvtap.c:709: warning: unused variable 'ivp' [-Wunused-variable] util/macvtap.c: In function 'setPortProfileId': util/macvtap.c:733: error: storage size of 'ivp' isn't known util/macvtap.c:734: error: 'IFLA_VF_UUID_MAX' undeclared (first use in this function) util/macvtap.c:739: error: invalid application of 'sizeof' to incomplete type 'struct ifla_vf_port_profile' util/macvtap.c:734: warning: unused variable 'host_uuid' [-Wunused-variable] util/macvtap.c:733: warning: unused variable 'ivp' [-Wunused-variable] make[3]: *** [libvirt_util_la-macvtap.lo] Error 1
As of 2.6.34-rc7, 'IFLA_VF_PORT_PROFILE' is not yet in the kernel. Any estimate when this might change?
Correct, IFLA_VF_PORT_PROFILE is not in the kernel yet. The kernel patch is be discussed on the kernel netdev mailing list. The most recent discussion is on ways to merge VDP and CDCP into what I've posted for IFLA_VF_PORT_PROFILE. I'll send out another libvirt patch once the kernel discussions are final. -scott

libvir-list-bounces@redhat.com wrote on 05/10/2010 02:35:49 PM:
Correct, IFLA_VF_PORT_PROFILE is not in the kernel yet. The kernel
patch is
be discussed on the kernel netdev mailing list. The most recent discussion is on ways to merge VDP and CDCP into what I've posted for IFLA_VF_PORT_PROFILE.
I'll send out another libvirt patch once the kernel discussions are final.
I suppose you will provide the libvirt patch only for what seems to be Cisco technology support. Now the slight differences in technology that we seem to try to support here are reflected in the parameters that go into the XML and end up in the netlink messages. Any way to consolidate that? I suppose VM migration between different switch vendors' switch isn't going to be that easy? Stefan

On 5/10/10 11:57 AM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
libvir-list-bounces@redhat.com wrote on 05/10/2010 02:35:49 PM:
Correct, IFLA_VF_PORT_PROFILE is not in the kernel yet. The kernel patch is be discussed on the kernel netdev mailing list. The most recent discussion is on ways to merge VDP and CDCP into what I've posted for IFLA_VF_PORT_PROFILE.
I'll send out another libvirt patch once the kernel discussions are final.
I suppose you will provide the libvirt patch only for what seems to be Cisco technology support.
I can only test with our production equipment setup, so I'm hesitant to include additions to the patch for VDP/CDCP which I can't test.
Now the slight differences in technology that we seem to try to support here are reflected in the parameters that go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg that can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set. -scott

* Scott Feldman (scofeldm@cisco.com) wrote:
On 5/10/10 11:57 AM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
libvir-list-bounces@redhat.com wrote on 05/10/2010 02:35:49 PM:
Correct, IFLA_VF_PORT_PROFILE is not in the kernel yet. The kernel patch is be discussed on the kernel netdev mailing list. The most recent discussion is on ways to merge VDP and CDCP into what I've posted for IFLA_VF_PORT_PROFILE.
I'll send out another libvirt patch once the kernel discussions are final.
I suppose you will provide the libvirt patch only for what seems to be Cisco technology support.
I can only test with our production equipment setup, so I'm hesitant to include additions to the patch for VDP/CDCP which I can't test.
Now the slight differences in technology that we seem to try to support here are reflected in the parameters that go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg that can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set.
Right, and would point to a flag to differentiate the two in xml too. thanks, -chris

On 5/10/10 12:14 PM, "Chris Wright" <chrisw@redhat.com> wrote:
* Scott Feldman (scofeldm@cisco.com) wrote:
Now the slight differences in technology that we seem to try to support here are reflected in the parameters that go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg that can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set.
Right, and would point to a flag to differentiate the two in xml too.
Here's a proposal to consolidate both technologies: 1) Use the IFLA_VF_PORT_PROFILE netlink msg I defined which has three basic sets of information: a) port-profile name b) mac addr of guest interface c) auxiliary info such as host UUID, client UUID, etc. 2) Define the XML to pass the above using mcast netlink msg. 3) Create a port-profile manager for LLDPAD to map port-profile to internal protocol settings. The mapping would resolve VDP parameters, for example, given a port-profile. Like: port-profile: "joes-garage" ---> VSI Manager ID: 15 VSI Type ID: 12345 VSI Type ID Ver: 1 VSI Instance ID would come from client UUID (or is it host UUID?). This proposal has these good qualities: 1) single netlink msg for kernel and user-space 2) single parameter set for sender's perspective (libvirt) 3) single XML spec 4) single code path in libvirt 5) (potential) cross-vendor-switch VM migration 6) user-friendly port-profile names 7) works for the following use-cases: a) firmware adapter that talks to external switch directly b) software switch that talks to external switch directly c) host daemon agent that talks to external switch indirectly The details of the port-profile mgr would need to be worked out. Is there local mapping per host or across hosts? Comments? -scott

Scott Feldman <scofeldm@cisco.com> wrote on 05/10/2010 03:53:45 PM:
Stefan Berger, Gerhard Stenzel, libvir-list, libvir-list-bounces
On 5/10/10 12:14 PM, "Chris Wright" <chrisw@redhat.com> wrote:
* Scott Feldman (scofeldm@cisco.com) wrote:
Now the slight differences in technology that we seem to try to support here are reflected in the parameters
go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg
that that
can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set.
Right, and would point to a flag to differentiate the two in xml too.
Here's a proposal to consolidate both technologies:
1) Use the IFLA_VF_PORT_PROFILE netlink msg I defined which has three basic sets of information:
a) port-profile name b) mac addr of guest interface c) auxiliary info such as host UUID, client UUID, etc.
2) Define the XML to pass the above using mcast netlink msg.
3) Create a port-profile manager for LLDPAD to map port-profile to internal protocol settings. The mapping would resolve VDP parameters, for example, given a port-profile. Like:
port-profile: "joes-garage" ---> VSI Manager ID: 15 VSI Type ID: 12345 VSI Type ID Ver: 1
Sounds like this would require a whole new management API to get this mapping onto the machine and that probably isn't anywhere in place today...
VSI Instance ID would come from client UUID (or is it host UUID?).
Previously sounded to me like this would be a per interface UUID.
This proposal has these good qualities:
1) single netlink msg for kernel and user-space 2) single parameter set for sender's perspective (libvirt) 3) single XML spec 4) single code path in libvirt 5) (potential) cross-vendor-switch VM migration 6) user-friendly port-profile names 7) works for the following use-cases:
a) firmware adapter that talks to external switch directly b) software switch that talks to external switch directly c) host daemon agent that talks to external switch indirectly
The details of the port-profile mgr would need to be worked out. Is
there
local mapping per host or across hosts?
Comments?
802.1Qbg + 802.1Qbh => 802.1Qbi :-) Stefan
-scott

On 5/10/10 1:07 PM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/10/2010 03:53:45 PM:
3) Create a port-profile manager for LLDPAD to map port-profile to internal protocol settings. The mapping would resolve VDP parameters, for example, given a port-profile. Like:
port-profile: "joes-garage" ---> VSI Manager ID: 15 VSI Type ID: 12345 VSI Type ID Ver: 1
Sounds like this would require a whole new management API to get this mapping onto the machine and that probably isn't anywhere in place today...
Yes, that's what's required for step 3).
Comments?
802.1Qbg + 802.1Qbh => 802.1Qbi :-)
Ah, that reminds me I forgot one other good quality about my proposal: 8) Allows pre-standards (bg and bh) to change, but the implementation details are hidden from the netlink API. -scott

Scott Feldman <scofeldm@cisco.com> wrote on 05/10/2010 04:30:23 PM:
Chris Wright, Gerhard Stenzel, libvir-list, libvir-list-bounces
On 5/10/10 1:07 PM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
Scott Feldman <scofeldm@cisco.com> wrote on 05/10/2010 03:53:45 PM:
3) Create a port-profile manager for LLDPAD to map port-profile to internal protocol settings. The mapping would resolve VDP
parameters,
for example, given a port-profile. Like:
port-profile: "joes-garage" ---> VSI Manager ID: 15 VSI Type ID: 12345 VSI Type ID Ver: 1
Sounds like this would require a whole new management API to get this mapping onto the machine and that probably isn't anywhere in place today...
Yes, that's what's required for step 3).
The other way around would be easier. If there was an otherwise-forbidden separator character we could have the 15/12345/1 and assume that if '/' is found we go for 802.1Qbg, 802.Qbh otherwise. Stefan
Comments?
802.1Qbg + 802.1Qbh => 802.1Qbi :-)
Ah, that reminds me I forgot one other good quality about my proposal:
8) Allows pre-standards (bg and bh) to change, but the implementation details are hidden from the netlink API.
-scott

On Mon, 10 May 2010, Scott Feldman wrote:
On 5/10/10 12:14 PM, "Chris Wright" <chrisw@redhat.com> wrote:
* Scott Feldman (scofeldm@cisco.com) wrote:
Now the slight differences in technology that we seem to try to support here are reflected in the parameters that go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg that can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set.
Right, and would point to a flag to differentiate the two in xml too.
Here's a proposal to consolidate both technologies:
1) Use the IFLA_VF_PORT_PROFILE netlink msg I defined which has three basic sets of information:
a) port-profile name b) mac addr of guest interface c) auxiliary info such as host UUID, client UUID, etc.
2) Define the XML to pass the above using mcast netlink msg.
3) Create a port-profile manager for LLDPAD to map port-profile to internal protocol settings. The mapping would resolve VDP parameters, for example, given a port-profile. Like:
port-profile: "joes-garage" ---> VSI Manager ID: 15 VSI Type ID: 12345 VSI Type ID Ver: 1
This requires some way to manage the mappings which the recipient would need to know to retrieve.
VSI Instance ID would come from client UUID (or is it host UUID?).
The two proposals have the MAC address of the guest interface in common but the other parameters are different. The VSI_intance is equivalent to the virtual interface and so is not the same as the client UUID. It might be preferable to follow Stefan's suggestion and separate out the contents: <interface type='direct'> <source dev='static' mode='vepa'/> <model type='virtio'/> <vsi managerid='12' typeid='0x123456' typeidversion='1' instanceid='fa9b7fff-b0a0-4893-8e0e-beef4ff18f8f' /> <filterref filter='clean-traffic'/> </interface> <interface type='direct'> <source dev='static' mode='vepa'/> <model type='virtio'/> <vsi profileid='my_profile'/> </interface> The VirVSIType (802.1Qbg or 802.1Qbh) allows the recipient to filter out the content. That shouls allow for the elements below while keeping the same infrastructure (well - not the friendly name maybe). Vivek
This proposal has these good qualities:
1) single netlink msg for kernel and user-space 2) single parameter set for sender's perspective (libvirt) 3) single XML spec 4) single code path in libvirt 5) (potential) cross-vendor-switch VM migration 6) user-friendly port-profile names 7) works for the following use-cases:
a) firmware adapter that talks to external switch directly b) software switch that talks to external switch directly c) host daemon agent that talks to external switch indirectly
The details of the port-profile mgr would need to be worked out. Is there local mapping per host or across hosts?
Comments?
-scott
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

Scott Feldman <scofeldm@cisco.com> wrote on 05/10/2010 03:11:22 PM:
chrisw, Gerhard Stenzel, libvir-list, libvir-list-bounces
On 5/10/10 11:57 AM, "Stefan Berger" <stefanb@us.ibm.com> wrote:
libvir-list-bounces@redhat.com wrote on 05/10/2010 02:35:49 PM:
Correct, IFLA_VF_PORT_PROFILE is not in the kernel yet. The kernel patch is be discussed on the kernel netdev mailing list. The most
discussion is on ways to merge VDP and CDCP into what I've posted for IFLA_VF_PORT_PROFILE.
I'll send out another libvirt patch once the kernel discussions are final.
I suppose you will provide the libvirt patch only for what seems to be Cisco technology support.
I can only test with our production equipment setup, so I'm hesitant to include additions to the patch for VDP/CDCP which I can't test.
Now the slight differences in technology that we seem to try to support here are reflected in the parameters
go into the XML and end up in the netlink messages. Any way to consolidate that?
I doesn't appear we'll be able to consolidate the parameters between the two technologies based on what I've seen from Arnd's latest patch on the kernel mailing list. The latest proposal is to define a single netlink msg
recent that that
can handle two disjoint sets of parameters. If there is no way for further consolidation, it probably makes more senses to have two different netlink msgs, one for each parameter set.
Would it be possible to support these 2 XML : <interface type='direct'> <source dev='static' mode='vepa'/> <model type='virtio'/> <vsi managerid='12' typeid='0x123456' typeidversion='1' instanceid='fa9b7fff-b0a0-4893-8e0e-beef4ff18f8f' /> <filterref filter='clean-traffic'/> </interface> <interface type='direct'> <source dev='static' mode='vepa'/> <model type='virtio'/> <vsi profileid='my_profile'/> </interface> and profileid becomes mutually exclusive to the parameters shown above. The internal data structures (domain_conf.h) to use would be defined as this: enum virVSIType { VIR_VSI_INVALID, VIR_VSI_8021QBG, VIR_VSI_8021QBH, }; /* profile data for macvtap (VEPA) */ typedef struct _virVSIProfileDef virVSIProfileDef; typedef virVSIProfileDef *virVSIProfileDefPtr; struct _virVSIProfileDef { enum virVSIType vsiType; struct { uint8_t managerID; uint32_t typeID; // 24 bit valid uint8_t typeIDVersion; unsigned char instanceID[VIR_UUID_BUFLEN]; } vsi8021Qbg; #ifdef IFLA_VF_PORT_PROFILE_MAX struct { char profileID[IFLA_VF_PORT_PROFILE_MAX]; } vsi8021Qbh; #endif }; [...] struct { char *linkdev; int mode; virVSIProfileDef vsiProfile; } direct; Regards, Stefan
participants (6)
-
Chris Wright
-
Daniel P. Berrange
-
Gerhard Stenzel
-
Scott Feldman
-
Stefan Berger
-
Vivek Kashyap