On 2018-08-31 at 20:02, Erik Skultety wrote:
On Thu, Aug 30, 2018 at 02:27:05PM +0800, Shi Lei wrote:
> Thanks for your comments.
> But I have several questions, please see below...
>
> On 2018-08-29 at 19:43, Erik Skultety wrote:
> >On Thu, Aug 23, 2018 at 12:15:08PM +0800, Shi Lei wrote:
> >> This patch adds virNetlinkNewLink for simplifying virNetDevMacVLanCreate
> >> and virNetDevBridgeCreate.
> >>
> >> Signed-off-by: Shi Lei <shi_lei(a)massclouds.com>
> >> ---
> >
> >There are multiple changes happening in this patch so it will need to be split
> >into several patches, see below...
>
> Okay. Then the v2 series should be like:
> [patch v2 0/4]: ... cover ...
> [patch v2 1/4]: Add wrapper macros to make code more readable
> [patch v2 2/4]: Introduce virNetlinkNewLink
2/4 would come before 1/4
Okay.
> [patch v2 3/4]: Replace the implementation of virNetDevBridgeCreate
> [patch v2 4/4]: Replace the implementation of virNetDevMacVLanCreate
3/4 + 4/4 would be a single patch
Okay.
...
> >> +static int
> >> +virNetDevMacVLanCreateCallback(struct nl_msg *nl_msg, const void *opaque)
> >> +{
> >> + const uint32_t *mode = (const uint32_t *) opaque;
> >> + if (!nl_msg || !opaque) {
> >> + virReportError(VIR_ERR_INTERNAL_ERROR,
> >> + _("nl_msg %p or opaque %p is NULL"),
> >> + nl_msg, opaque);
> >> + return -1;
> >> + }
> >> +
> >> + if (*mode > 0) {
> >> + struct nlattr *info_data;
> >> + if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
> >> + goto buffer_too_small;
> >> +
> >> + if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(*mode), mode) <
0)
> >> + goto buffer_too_small;
> >> +
> >> + nla_nest_end(nl_msg, info_data);
> >> + }
> >> +
> >> + return 0;
> >> +
> >> + buffer_too_small:
> >> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >> + _("allocated netlink buffer is too small"));
> >> + return -1;
> >> +}
> >
> >Having a callback just to add a nested data into nl_msg doesn't feel like
the
> >right approach, I'd expect a callback to do more specific stuff, this should
be
> >special-cased in virNetlinkNewLink.
>
> I looked through the code for ip-link in iproute2 which could be a good example
> for netlink. And I found that the differences of attributes of various net-devices
are
> confined to IFLA_INFO_DATA or IFLA_INFO_SLAVE_DATA(this is not used in libvirt).
> The code fragment in iplink.c:
>
> if (ulinep && !strcmp(ulinep, "_slave")) {
> ... ...
> lu = get_link_slave_kind(slavebuf);
> iflatype = IFLA_INFO_SLAVE_DATA;
> } else {
> => lu = get_link_kind(type); /* Get pointer to the driver for
this type */
> => iflatype = IFLA_INFO_DATA; /* NEST will be for IFLA_INFO_DATA */
> }
> if (lu && argc) {
> => struct rtattr *data = addattr_nest(&req.n, sizeof(req), iflatype);
/* NEST BEGIN */
>
> ****************************
> if (lu->parse_opt &&
> lu->parse_opt(lu, argc, argv, &req.n))
> return -1;
> ****************************
> ^The lu points to various types of net-drivers(e.g.
macvlan/bridge/vxlan/vlan ...)
> which pack all special self-attributes in their parse_opt callback
>
> => addattr_nest_end(&req.n, data); /* NEST END */
>
> So I think that it's reasonable for us to have a callback to handle it just as
iproute2
> since iproute2 could almost create all the types of net-devices.
I went through the iproute2 code and it completely makes sense for them to
utilize callbacks here, given the amount of driver-specific options each of the
device type has. While I agree that we could make use of this in libvirt one
day, I don't feel like we need to do it now, since I don't think it brings
any noticeable benefit in its currently proposed form + as I already mentioned,
we can switch to this scheme once there are more than 1 netlink types which
could make use of it.
Okay.
>
> >
> >> +
> >> +
> >> /**
> >> * virNetDevMacVLanCreate:
> >> *
> >> @@ -307,113 +338,29 @@ virNetDevMacVLanCreate(const char *ifname,
> >> uint32_t macvlan_mode,
> >> int *retry)
> >> {
> >
> >^This replacement would be the last patch in the series.
>
> Okay.
>
> >
> >> - int rc = -1;
> >> - struct nlmsgerr *err;
> >> - struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
> >> int ifindex;
> >> - unsigned int recvbuflen;
> >> - struct nl_msg *nl_msg;
> >> - struct nlattr *linkinfo, *info_data;
> >> - char macstr[VIR_MAC_STRING_BUFLEN];
> >> - VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
> >> -
> >> - if (virNetDevGetIndex(srcdev, &ifindex) < 0)
> >> - return -1;
> >> -
> >> + int error = 0;
> >> *retry = 0;
> >>
> >> - nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
> >> - NLM_F_REQUEST | NLM_F_CREATE |
NLM_F_EXCL);
> >> - if (!nl_msg) {
> >> - virReportOOMError();
> >> + if (virNetDevGetIndex(srcdev, &ifindex) < 0)
> >> return -1;
> >> - }
> >> -
> >> - if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO)
< 0)
> >> - goto buffer_too_small;
> >> -
> >> - if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
> >> - goto buffer_too_small;
> >> -
> >> - if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
> >> - goto buffer_too_small;
> >> -
> >> - if (ifname &&
> >> - nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
> >> - goto buffer_too_small;
> >>
> >> - if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
> >> - goto buffer_too_small;
> >> -
> >> - if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
> >> - goto buffer_too_small;
> >> -
> >> - if (macvlan_mode > 0) {
> >> - if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
> >> - goto buffer_too_small;
> >> -
> >> - if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
> >> - &macvlan_mode) < 0)
> >> - goto buffer_too_small;
> >> -
> >> - nla_nest_end(nl_msg, info_data);
> >> - }
> >> -
> >> - nla_nest_end(nl_msg, linkinfo);
> >> -
> >> - if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
> >> - NETLINK_ROUTE, 0) < 0) {
> >> - goto cleanup;
> >> - }
> >> -
> >> - if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
> >> - goto malformed_resp;
> >> -
> >> - switch (resp->nlmsg_type) {
> >> - case NLMSG_ERROR:
> >> - err = (struct nlmsgerr *)NLMSG_DATA(resp);
> >> - if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
> >> - goto malformed_resp;
> >> -
> >> - switch (err->error) {
> >> -
> >> - case 0:
> >> - break;
> >> -
> >> - case -EEXIST:
> >> + if (virNetlinkNewLink(&ifindex, ifname, macaddress, type,
> >> + virNetDevMacVLanCreateCallback,
&macvlan_mode,
> >> + &error) < 0) {
> >> + char macstr[VIR_MAC_STRING_BUFLEN];
> >> + if (error == -EEXIST)
> >> *retry = 1;
> >> - goto cleanup;
> >> -
> >> - default:
> >> - virReportSystemError(-err->error,
> >> + else
> >> + virReportSystemError(-error,
> >> _("error creating %s interface %s@%s
(%s)"),
> >> type, ifname, srcdev,
> >> virMacAddrFormat(macaddress, macstr));
> >> - goto cleanup;
> >> - }
> >> - break;
> >> -
> >> - case NLMSG_DONE:
> >> - break;
> >>
> >> - default:
> >> - goto malformed_resp;
> >> + return -1;
> >> }
> >>
> >> - rc = 0;
> >> - cleanup:
> >> - nlmsg_free(nl_msg);
> >> - return rc;
> >> -
> >> - malformed_resp:
> >> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >> - _("malformed netlink response message"));
> >> - goto cleanup;
> >> -
> >> - buffer_too_small:
> >> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >> - _("allocated netlink buffer is too small"));
> >> - goto cleanup;
> >> + return 0;
> >> }
> >>
> >> /**
> >> diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
> >> index 8f06446..817e347 100644
> >> --- a/src/util/virnetlink.c
> >> +++ b/src/util/virnetlink.c
> >> @@ -489,6 +489,110 @@ virNetlinkDumpLink(const char *ifname, int ifindex,
> >>
> >>
> >> /**
> >> + * virNetlinkNewLink:
> >
> >Introduction of this function should come in a separate patch before you start
> >replacing the existing ones.
>
> Okay.
>
> >
> >> + *
> >> + * @ifindex: The index for the 'link' device
> >> + * @ifname: The name of the link
> >> + * @mac: The MAC address of the device
> >
> >Most of ^these attributes could be packed into a virNetlinkNewLinkData
> >structure (or something similarly named). You'll have to test check for
> >VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA according to my
> >comments below inside <comment_block>...
>
> I have studied your comments inside <comment_block>, but I still don't know
how to
> test check for VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA.
> Could you explain it for me? :-)
Actually, I retract on checking the modes ^this way, I was hasty with my
response and then went through the code more thoroughly and realized that mode
corresponds to netlink modes rather than to libvirt MACVLAN_MODE_X and we have
a corresponding modemap for this. Not that it could not be achieved with our
enums, but it's just not worth passing it around, so we'll need to imo rely on
the @type string, which in this case would either be "macvtap" or
"macvlan".
Okay.
>
> >
> >> + * @type: The type of device, i.e., "bridge",
"macvtap", "macvlan"
> >> + * @cb: The callback for filling in IFLA_INFO_DATA for this type
> >
> >I don't think callback will be necessary
>
> I think that it's reasonable to use callback here.
> Another mode might be:
> 1) pre-alloc buffer and fill in IFLA_INFO_DATA
> 2) pass the buffer pointer as a param into virNetlinkNewLink which will
> embed it into nlmsg
> 3) free the buffer
> I think that this mode might be a bit more complicated and it's hard to find good
references.
>
> >
> >> + * @opaque: opaque for the callback
> >> + * @error: for retrieving error code
> >
> >^see below for my comment on return value...
>
> Okay.
>
> >
> >> + *
> >> + * Create a network "link" (aka interface aka device) with the
given
> >> + * args. This works for many different types of network devices,
> >> + * including macvtap and bridges.
> >> + *
> >> + * Returns 0 on success, -1 on fatal error.
> >
<brain fart>
> >Since this is a nice libvirt wrapper around the netlink communication
> >machinery, I think that returning -<netlink_error> in case of an error is
> >reasonable. Then, each of the callers of this API can handle the return values
> >as they please.
<brain fart/>
>
> Okay. I will use netlink error as return-value.
> But how to handle -1?
> 1) always return -1 for *other* failures
> 2) return -ENOMEM for OutofMemory; return -ENOBUFS for buffer_too_small; ...
Good point, we couldn't differentiate between -1 from libvirt and -ENOPERM or
-NLE_FAILURE (from libnl), so the @error variable should stay.
Okay.
> I should take which one?
>
> >
> >> + */
> >> +int
> >> +virNetlinkNewLink(const int *ifindex,
> >> + const char *ifname,
> >> + const virMacAddr *mac,
> >> + const char *type,
> >> + virNetlinkNewLinkCallback cb,
> >> + const void *opaque,
> >> + int *error)
> >> +{
> >> + struct nlmsgerr *err;
> >> + struct nlattr *linkinfo;
> >> + unsigned int buflen;
> >> + struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
> >> + VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
> >> + VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
> >> +
> >> + *error = 0;
> >> +
> >> + nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
> >> + NLM_F_REQUEST | NLM_F_CREATE |
NLM_F_EXCL);
> >> + if (!nl_msg) {
> >> + virReportOOMError();
> >> + return -1;
> >> + }
> >> +
> >> + if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO)
< 0)
> >> + goto buffer_too_small;
> >> +
> >
>
> No offence. Let me try talking about nla_put* in comment_block.
>
> ><comment_block>
> >
> >> + if (ifindex && nla_put_u32(nl_msg, IFLA_LINK, *ifindex) <
0)
> >
> >pre-existing, but is there a particular reason why ^this has to be u32??
>
> prototype of nla_put_u32:
> int nla_put_u32(struct nl_msg *msg, int attrtype, uint32_t value);
> It is used for adding 32 bit integer into msg.
> We got ifindex by calling ioctl with SIOCGIFINDEX and this ifindex >= 0.
> So I think that it's right to use nla_put_u32.
>
> >
> >> + goto buffer_too_small;
> >> +
> >> + if (mac && nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, mac)
< 0)
> >> + goto buffer_too_small;
> >
> >Similarly, is there a reason why ^this is not necessary to be u32?
This was a brain fart too, of course it wouldn't work because we're not
adding an int here, so you're right.
Okay.
>
> prototype of nla_put:
> int nla_put(struct nl_msg *msg, int attrtype, int datalen, const void *data);
> It means put a sequence of octets into msg. It could be used for string and
> other packed struct.
> The type of mac is struct virMacAddr which size is VIR_MAC_BUFLEN.
> So I think it's right.
>
> >
> >> +
> >> + if (ifname && nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1,
ifname) < 0)
> >> + goto buffer_too_small;
> >> +
> >> + if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
> >
> >Since we're already touching the code, I would find it more readable if
^this
> >was written as linkinfo = nla_nest_start. The reason for that is that it's
not
> >immediately visible where the matching counterpart to "nla_nest_end"
is.
>
> Okay.
>
> >
> >> + goto buffer_too_small;
> >> +
> >> + if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
> >
> >I'm puzzled why ^this doesn't need strlen(foo)+1 instead of plain
strlen(foo)
>
> This code is same as iproute2. There's no problem here.
> In net/core/rtnetlink.c of kernel (source v4.4.0):
> nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
> and the implementation of nla_strlcpy:
> size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
> {
> size_t srclen = nla_len(nla); /* whatever it is strlen(foo)+1 or
strlen(foo) */
> char *src = nla_data(nla);
> => if (srclen > 0 && src[srclen - 1] == '\0')
> => srclen--;
>
> But I think it should always be strlen(foo)+1 for consistency of code.
It's also safer, because that way you're always sure that you're getting a
nul-terminated string, especially since you'll be doing a plain memcpy in
nla_put + you'll avoid questions like mine out of sheer confusion.
Okay.
>
> >
> >> + goto buffer_too_small;
> >
> ></comment_block>
> >
> >We could actually create a wrapper macro around nla_put which would make the
> >block (begin-end) tiny more readable. See my notes above, as I have further
> >questions...Now, the macros would again be a separate patch to make the change
> >more gradual. Depending on the comments to my questions above, the macro would
> >either take the size of the type and always call nla_put or a specialized
> >nla_put_<string|u32|whatever> function. I'm CC'ing Laine to help
answering my
> >questions inside <comment_block>.
> >
> >Erik
>
> I think that the usage of nla_put* is clear now.
> So I don't know how to create wrapper macros.
> Could you give me some instructions for it?
Currently what I have on my branch is the following (more changes needed to
actually compile it):
diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
index 817e347cec..e626d53a31 100644
--- a/src/util/virnetlink.c
+++ b/src/util/virnetlink.c
@@ -515,12 +515,25 @@ virNetlinkNewLink(const int *ifindex,
int *error)
{
struct nlmsgerr *err;
- struct nlattr *linkinfo;
unsigned int buflen;
struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
+ struct nlattr *linkinfo = NULL;
+ struct nlattr *info_data = NULL;
VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
+# define NETLINK_MSG_NEST_START(container, attrtype) \
+ container = nla_nest_start(nl_msg, attrtype); \
+ if (!container) \
+ goto buffer_too_small;
+
+# define NETLINK_MSG_NEST_END(container) \
+ nla_nest_end(nl_msg, container);
+
+# define NETLINK_MSG_PUT(attrtype, datalen, data) \
+ if (data && nla_put(nl_msg, attrtype, datalen, data) < 0) \
+ goto buffer_too_small;
+
*error = 0;
nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
@@ -533,25 +546,24 @@ virNetlinkNewLink(const int *ifindex,
if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
goto buffer_too_small;
- if (ifindex && nla_put_u32(nl_msg, IFLA_LINK, *ifindex) < 0)
- goto buffer_too_small;
+ NETLINK_MSG_PUT(IFLA_LINK, sizeof(uint32_t), ifindex)
+ NETLINK_MSG_PUT(IFLA_ADDRESS, VIR_MAC_BUFLEN, mac)
+ NETLINK_MSG_PUT(IFLA_IFNAME, (strlen(ifname)+1), ifname)
- if (mac && nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, mac) < 0)
- goto buffer_too_small;
+ NETLINK_MSG_NEST_START(linkinfo, IFLA_LINKINFO)
- if (ifname && nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
- goto buffer_too_small;
+ NETLINK_MSG_PUT(IFLA_INFO_KIND, (strlen(type)+1), type)
- if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
- goto buffer_too_small;
+ if ((STREQ(type, "macvtap") || STREQ(type, "macvlan"))
&&
+ macvlan_mode && *macvlan_mode > 0) {
+ NETLINK_MSG_NEST_START(info_data, IFLA_INFO_DATA)
- if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
- goto buffer_too_small;
+ NETLINK_MSG_PUT(IFLA_MACVLAN_MODE, sizeof(uint32_t), macvlan_mode)
- if (cb && cb(nl_msg, opaque) < 0)
- return -1;
+ NETLINK_MSG_NEST_END(info_data)
+ }
- nla_nest_end(nl_msg, linkinfo);
+ NETLINK_MSG_NEST_END(linkinfo)
if (virNetlinkCommand(nl_msg, &resp, &buflen, 0, 0, NETLINK_ROUTE, 0) <
0)
return -1;
@@ -578,6 +590,10 @@ virNetlinkNewLink(const int *ifindex,
goto malformed_resp;
}
+# undef NETLINK_MSG_NEST_START
+# undef NETLINK_MSG_NEST_END
+# undef NETLINK_MSG_PUT
+
return 0;
malformed_resp:
Regards,
Erik
I got it! Thanks.
Shi Lei