Thanks for your comments.
But I have several questions, please see below...
On 2018-08-29 at 19:43, Erik Skultety wrote:
On Thu, Aug 23, 2018 at 12:15:08PM +0800, Shi Lei wrote:
> This patch adds virNetlinkNewLink for simplifying virNetDevMacVLanCreate
> and virNetDevBridgeCreate.
>
> Signed-off-by: Shi Lei <shi_lei(a)massclouds.com>
> ---
There are multiple changes happening in this patch so it will need to be split
into several patches, see below...
Okay. Then the v2 series should be like:
[patch v2 0/4]: ... cover ...
[patch v2 1/4]: Add wrapper macros to make code more readable
[patch v2 2/4]: Introduce virNetlinkNewLink
[patch v2 3/4]: Replace the implementation of virNetDevBridgeCreate
[patch v2 4/4]: Replace the implementation of virNetDevMacVLanCreate
> src/libvirt_private.syms | 1 +
> src/util/virnetdevbridge.c | 73 +++--------------------
> src/util/virnetdevmacvlan.c | 137 ++++++++++++++------------------------------
> src/util/virnetlink.c | 104 +++++++++++++++++++++++++++++++++
> src/util/virnetlink.h | 8 +++
> 5 files changed, 164 insertions(+), 159 deletions(-)
>
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 47ea35f..23931ba 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2439,6 +2439,7 @@ virNetlinkEventServiceStop;
> virNetlinkEventServiceStopAll;
> virNetlinkGetErrorCode;
> virNetlinkGetNeighbor;
> +virNetlinkNewLink;
> virNetlinkShutdown;
> virNetlinkStartup;
>
> diff --git a/src/util/virnetdevbridge.c b/src/util/virnetdevbridge.c
> index bc377b5..1f5b37e 100644
> --- a/src/util/virnetdevbridge.c
> +++ b/src/util/virnetdevbridge.c
> @@ -417,77 +417,22 @@ virNetDevBridgeCreate(const char *brname)
> {
> /* use a netlink RTM_NEWLINK message to create the bridge */
> const char *type = "bridge";
> - struct nlmsgerr *err;
> - struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
> - unsigned int recvbuflen;
> - struct nlattr *linkinfo;
> - VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
> - VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
> + int error = 0;
>
> - nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
> - NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
> - if (!nl_msg) {
> - virReportOOMError();
> - return -1;
> - }
> -
> - if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
> - goto buffer_too_small;
> - if (nla_put(nl_msg, IFLA_IFNAME, strlen(brname)+1, brname) < 0)
> - goto buffer_too_small;
> - if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
> - goto buffer_too_small;
> - if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
> - goto buffer_too_small;
> - nla_nest_end(nl_msg, linkinfo);
> -
> - if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
> - NETLINK_ROUTE, 0) < 0) {
> - return -1;
> - }
> -
> - if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
> - goto malformed_resp;
> -
> - switch (resp->nlmsg_type) {
> - case NLMSG_ERROR:
> - err = (struct nlmsgerr *)NLMSG_DATA(resp);
> - if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
> - goto malformed_resp;
> -
> - if (err->error < 0) {
> + if (virNetlinkNewLink(NULL, brname, NULL, type, NULL, NULL, &error) < 0)
{
> # if defined(HAVE_STRUCT_IFREQ) && defined(SIOCBRADDBR)
> - if (err->error == -EOPNOTSUPP) {
> - /* fallback to ioctl if netlink doesn't support creating
> - * bridges
> - */
> - return virNetDevBridgeCreateWithIoctl(brname);
> - }
> -# endif
> -
> - virReportSystemError(-err->error,
> - _("error creating bridge interface %s"),
> - brname);
> - return -1;
> + if (error == -EOPNOTSUPP) {
> + /* fallback to ioctl if netlink doesn't support creating bridges */
> + return virNetDevBridgeCreateWithIoctl(brname);
> }
> - break;
> +# endif
> + virReportSystemError(-error, _("error creating bridge interface
%s"),
> + brname);
>
> - case NLMSG_DONE:
> - break;
> - default:
> - goto malformed_resp;
> + return -1;
> }
>
> return 0;
> -
> - malformed_resp:
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("malformed netlink response message"));
> - return -1;
> - buffer_too_small:
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("allocated netlink buffer is too small"));
> - return -1;
> }
>
>
> diff --git a/src/util/virnetdevmacvlan.c b/src/util/virnetdevmacvlan.c
> index 2035b1f..1629add 100644
> --- a/src/util/virnetdevmacvlan.c
> +++ b/src/util/virnetdevmacvlan.c
> @@ -283,6 +283,37 @@ virNetDevMacVLanReleaseName(const char *name)
> }
>
>
> +static int
> +virNetDevMacVLanCreateCallback(struct nl_msg *nl_msg, const void *opaque)
> +{
> + const uint32_t *mode = (const uint32_t *) opaque;
> + if (!nl_msg || !opaque) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("nl_msg %p or opaque %p is NULL"),
> + nl_msg, opaque);
> + return -1;
> + }
> +
> + if (*mode > 0) {
> + struct nlattr *info_data;
> + if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
> + goto buffer_too_small;
> +
> + if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(*mode), mode) < 0)
> + goto buffer_too_small;
> +
> + nla_nest_end(nl_msg, info_data);
> + }
> +
> + return 0;
> +
> + buffer_too_small:
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("allocated netlink buffer is too small"));
> + return -1;
> +}
Having a callback just to add a nested data into nl_msg doesn't feel like the
right approach, I'd expect a callback to do more specific stuff, this should be
special-cased in virNetlinkNewLink.
I looked through the code for ip-link in iproute2 which could be a good example
for netlink. And I found that the differences of attributes of various net-devices are
confined to IFLA_INFO_DATA or IFLA_INFO_SLAVE_DATA(this is not used in libvirt).
The code fragment in iplink.c:
if (ulinep && !strcmp(ulinep, "_slave")) {
... ...
lu = get_link_slave_kind(slavebuf);
iflatype = IFLA_INFO_SLAVE_DATA;
} else {
=> lu = get_link_kind(type); /* Get pointer to the driver for this
type */
=> iflatype = IFLA_INFO_DATA; /* NEST will be for IFLA_INFO_DATA */
}
if (lu && argc) {
=> struct rtattr *data = addattr_nest(&req.n, sizeof(req), iflatype); /*
NEST BEGIN */
****************************
if (lu->parse_opt &&
lu->parse_opt(lu, argc, argv, &req.n))
return -1;
****************************
^The lu points to various types of net-drivers(e.g. macvlan/bridge/vxlan/vlan
...)
which pack all special self-attributes in their parse_opt callback
=> addattr_nest_end(&req.n, data); /* NEST END */
So I think that it's reasonable for us to have a callback to handle it just as
iproute2
since iproute2 could almost create all the types of net-devices.
> +
> +
> /**
> * virNetDevMacVLanCreate:
> *
> @@ -307,113 +338,29 @@ virNetDevMacVLanCreate(const char *ifname,
> uint32_t macvlan_mode,
> int *retry)
> {
^This replacement would be the last patch in the series.
Okay.
> - int rc = -1;
> - struct nlmsgerr *err;
> - struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
> int ifindex;
> - unsigned int recvbuflen;
> - struct nl_msg *nl_msg;
> - struct nlattr *linkinfo, *info_data;
> - char macstr[VIR_MAC_STRING_BUFLEN];
> - VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
> -
> - if (virNetDevGetIndex(srcdev, &ifindex) < 0)
> - return -1;
> -
> + int error = 0;
> *retry = 0;
>
> - nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
> - NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
> - if (!nl_msg) {
> - virReportOOMError();
> + if (virNetDevGetIndex(srcdev, &ifindex) < 0)
> return -1;
> - }
> -
> - if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
> - goto buffer_too_small;
> -
> - if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
> - goto buffer_too_small;
> -
> - if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
> - goto buffer_too_small;
> -
> - if (ifname &&
> - nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
> - goto buffer_too_small;
>
> - if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
> - goto buffer_too_small;
> -
> - if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
> - goto buffer_too_small;
> -
> - if (macvlan_mode > 0) {
> - if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
> - goto buffer_too_small;
> -
> - if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
> - &macvlan_mode) < 0)
> - goto buffer_too_small;
> -
> - nla_nest_end(nl_msg, info_data);
> - }
> -
> - nla_nest_end(nl_msg, linkinfo);
> -
> - if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
> - NETLINK_ROUTE, 0) < 0) {
> - goto cleanup;
> - }
> -
> - if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
> - goto malformed_resp;
> -
> - switch (resp->nlmsg_type) {
> - case NLMSG_ERROR:
> - err = (struct nlmsgerr *)NLMSG_DATA(resp);
> - if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
> - goto malformed_resp;
> -
> - switch (err->error) {
> -
> - case 0:
> - break;
> -
> - case -EEXIST:
> + if (virNetlinkNewLink(&ifindex, ifname, macaddress, type,
> + virNetDevMacVLanCreateCallback, &macvlan_mode,
> + &error) < 0) {
> + char macstr[VIR_MAC_STRING_BUFLEN];
> + if (error == -EEXIST)
> *retry = 1;
> - goto cleanup;
> -
> - default:
> - virReportSystemError(-err->error,
> + else
> + virReportSystemError(-error,
> _("error creating %s interface %s@%s
(%s)"),
> type, ifname, srcdev,
> virMacAddrFormat(macaddress, macstr));
> - goto cleanup;
> - }
> - break;
> -
> - case NLMSG_DONE:
> - break;
>
> - default:
> - goto malformed_resp;
> + return -1;
> }
>
> - rc = 0;
> - cleanup:
> - nlmsg_free(nl_msg);
> - return rc;
> -
> - malformed_resp:
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("malformed netlink response message"));
> - goto cleanup;
> -
> - buffer_too_small:
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("allocated netlink buffer is too small"));
> - goto cleanup;
> + return 0;
> }
>
> /**
> diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
> index 8f06446..817e347 100644
> --- a/src/util/virnetlink.c
> +++ b/src/util/virnetlink.c
> @@ -489,6 +489,110 @@ virNetlinkDumpLink(const char *ifname, int ifindex,
>
>
> /**
> + * virNetlinkNewLink:
Introduction of this function should come in a separate patch before you start
replacing the existing ones.
Okay.
> + *
> + * @ifindex: The index for the 'link' device
> + * @ifname: The name of the link
> + * @mac: The MAC address of the device
Most of ^these attributes could be packed into a virNetlinkNewLinkData
structure (or something similarly named). You'll have to test check for
VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA according to my
comments below inside <comment_block>...
I have studied your comments inside <comment_block>, but I still don't know how
to
test check for VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA.
Could you explain it for me? :-)
> + * @type: The type of device, i.e., "bridge", "macvtap",
"macvlan"
> + * @cb: The callback for filling in IFLA_INFO_DATA for this type
I don't think callback will be necessary
I think that it's reasonable to use callback here.
Another mode might be:
1) pre-alloc buffer and fill in IFLA_INFO_DATA
2) pass the buffer pointer as a param into virNetlinkNewLink which will
embed it into nlmsg
3) free the buffer
I think that this mode might be a bit more complicated and it's hard to find good
references.
> + * @opaque: opaque for the callback
> + * @error: for retrieving error code
^see below for my comment on return value...
Okay.
> + *
> + * Create a network "link" (aka interface aka device) with the given
> + * args. This works for many different types of network devices,
> + * including macvtap and bridges.
> + *
> + * Returns 0 on success, -1 on fatal error.
Since this is a nice libvirt wrapper around the netlink communication
machinery, I think that returning -<netlink_error> in case of an error is
reasonable. Then, each of the callers of this API can handle the return values
as they please.
Okay. I will use netlink error as return-value.
But how to handle -1?
1) always return -1 for *other* failures
2) return -ENOMEM for OutofMemory; return -ENOBUFS for buffer_too_small; ...
I should take which one?
> + */
> +int
> +virNetlinkNewLink(const int *ifindex,
> + const char *ifname,
> + const virMacAddr *mac,
> + const char *type,
> + virNetlinkNewLinkCallback cb,
> + const void *opaque,
> + int *error)
> +{
> + struct nlmsgerr *err;
> + struct nlattr *linkinfo;
> + unsigned int buflen;
> + struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
> + VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
> + VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
> +
> + *error = 0;
> +
> + nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
> + NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
> + if (!nl_msg) {
> + virReportOOMError();
> + return -1;
> + }
> +
> + if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
> + goto buffer_too_small;
> +
No offence. Let me try talking about nla_put* in comment_block.
<comment_block>
> + if (ifindex && nla_put_u32(nl_msg, IFLA_LINK, *ifindex) < 0)
pre-existing, but is there a particular reason why ^this has to be u32??
prototype of nla_put_u32:
int nla_put_u32(struct nl_msg *msg, int attrtype, uint32_t value);
It is used for adding 32 bit integer into msg.
We got ifindex by calling ioctl with SIOCGIFINDEX and this ifindex >= 0.
So I think that it's right to use nla_put_u32.
> + goto buffer_too_small;
> +
> + if (mac && nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, mac) < 0)
> + goto buffer_too_small;
Similarly, is there a reason why ^this is not necessary to be u32?
prototype of nla_put:
int nla_put(struct nl_msg *msg, int attrtype, int datalen, const void *data);
It means put a sequence of octets into msg. It could be used for string and
other packed struct.
The type of mac is struct virMacAddr which size is VIR_MAC_BUFLEN.
So I think it's right.
> +
> + if (ifname && nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname)
< 0)
> + goto buffer_too_small;
> +
> + if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
Since we're already touching the code, I would find it more readable if ^this
was written as linkinfo = nla_nest_start. The reason for that is that it's not
immediately visible where the matching counterpart to "nla_nest_end" is.
Okay.
> + goto buffer_too_small;
> +
> + if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
I'm puzzled why ^this doesn't need strlen(foo)+1 instead of plain strlen(foo)
This code is same as iproute2. There's no problem here.
In net/core/rtnetlink.c of kernel (source v4.4.0):
nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
and the implementation of nla_strlcpy:
size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
{
size_t srclen = nla_len(nla); /* whatever it is strlen(foo)+1 or strlen(foo) */
char *src = nla_data(nla);
=> if (srclen > 0 && src[srclen - 1] == '\0')
=> srclen--;
But I think it should always be strlen(foo)+1 for consistency of code.
> + goto buffer_too_small;
</comment_block>
We could actually create a wrapper macro around nla_put which would make the
block (begin-end) tiny more readable. See my notes above, as I have further
questions...Now, the macros would again be a separate patch to make the change
more gradual. Depending on the comments to my questions above, the macro would
either take the size of the type and always call nla_put or a specialized
nla_put_<string|u32|whatever> function. I'm CC'ing Laine to help answering
my
questions inside <comment_block>.
Erik
I think that the usage of nla_put* is clear now.
So I don't know how to create wrapper macros.
Could you give me some instructions for it?
Regards,
Shi Lei
> +
> + if (cb && cb(nl_msg, opaque) < 0)
> + return -1;
> +
> + nla_nest_end(nl_msg, linkinfo);
> +
> + if (virNetlinkCommand(nl_msg, &resp, &buflen, 0, 0, NETLINK_ROUTE, 0)
< 0)
> + return -1;
> +
> + if (buflen < NLMSG_LENGTH(0) || resp == NULL)
> + goto malformed_resp;
> +
> + switch (resp->nlmsg_type) {
> + case NLMSG_ERROR:
> + err = (struct nlmsgerr *)NLMSG_DATA(resp);
> + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
> + goto malformed_resp;
> +
> + if (err->error < 0) {
> + *error = err->error;
> + return -1;
> + }
> + break;
> +
> + case NLMSG_DONE:
> + break;
> +
> + default:
> + goto malformed_resp;
> + }
> +
> + return 0;
> +
> + malformed_resp:
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("malformed netlink response message"));
> + return -1;
> +
> + buffer_too_small:
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("allocated netlink buffer is too small"));
> + return -1;
> +}
> +
> +
> +/**
> * virNetlinkDelLink:
> *
> * @ifname: Name of the link
> diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h
> index 1e1e616..195c7bb 100644
> --- a/src/util/virnetlink.h
> +++ b/src/util/virnetlink.h
> @@ -65,6 +65,14 @@ int virNetlinkDumpCommand(struct nl_msg *nl_msg,
> unsigned int protocol, unsigned int groups,
> void *opaque);
>
> +typedef int (*virNetlinkNewLinkCallback)(struct nl_msg *nl_msg,
> + const void *opaque);
> +
> +int virNetlinkNewLink(const int *ifindex, const char *ifname,
> + const virMacAddr *macaddress, const char *type,
> + virNetlinkNewLinkCallback cb, const void *opaque,
> + int *error);
> +
> typedef int (*virNetlinkDelLinkFallback)(const char *ifname);
>
> int virNetlinkDelLink(const char *ifname, virNetlinkDelLinkFallback fallback);
> --
> 2.7.4
>
>
> --
> libvir-list mailing list
> libvir-list(a)redhat.com
>
https://www.redhat.com/mailman/listinfo/libvir-list