[libvirt] [PATCH RFC 0/2] XML: Improve XML parse error reporting

First patch modifies the error reporting function on parse errors of XML files. A new, more informative message is presented to the user, containing filename of the offending file, error description and context of the error in libxml2 style. Second patch changes default filenames used while parsing XML strings in memory to NULL, so that the error reporting function does not print bougus filename. src/conf/domain_conf.c - change "domain.xml" to NULL src/security/virt-aa-helper.c - change "domain.xml" to NULL src/util/xml.c - modify error reporting function Peter Krempa (2): XML: Improve XML parsing error messages XML: Suppress printing "domain.xml" for parse errors on XML strings src/conf/domain_conf.c | 2 +- src/security/virt-aa-helper.c | 2 +- src/util/xml.c | 88 +++++++++++++++++++++++++++++++++++------ 3 files changed, 78 insertions(+), 14 deletions(-) -- 1.7.3.4

This patch modifies error handling function for the XML parser provided by libxml2. Originaly only a line number and error message were logged. With this new error handler function, the user is provided with a more complex description of the parsing error. Context of the error is printed in libXML2 style and filename of the file, that caused the error is printed. Example of an parse error: 13:41:36.262: 16032: error : catchXMLError:706 : /etc/libvirt/qemu/rh_bad.xml:58: Opening and ending tag mismatch: name line 2 and domain </domain> ---------^ Context of the error gives the user hints that may help to quickly locate a corrupt xml file. fixes BZs: ---------- Bug 708735 - [RFE] Show column and line on XML parsing error https://bugzilla.redhat.com/show_bug.cgi?id=708735 Bug 726771 - libvirt does not specify problem file if persistent xml is invalid https://bugzilla.redhat.com/show_bug.cgi?id=726771 --- src/util/xml.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 76 insertions(+), 12 deletions(-) diff --git a/src/util/xml.c b/src/util/xml.c index d301af6..b0942da 100644 --- a/src/util/xml.c +++ b/src/util/xml.c @@ -633,28 +633,92 @@ virXPathNodeSet(const char *xpath, * catchXMLError: * * Called from SAX on parsing errors in the XML. + * + * This version is heavily based on xmlParserPrintFileContextInternal from libxml2. */ static void catchXMLError(void *ctx, const char *msg ATTRIBUTE_UNUSED, ...) { - int domcode = VIR_FROM_XML; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; - if (ctxt) { - if (ctxt->_private) + const xmlChar *cur, *base; + unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ + int domcode = VIR_FROM_XML; + + virBuffer buf = VIR_BUFFER_INITIALIZER; + char *contextstr = NULL; + char *pointerstr = NULL; + + + /* conditions for error printing */ + if (!ctxt || + (virGetLastError() != NULL) || + ctxt->input == NULL || + ctxt->lastError.level != XML_ERR_FATAL || + ctxt->lastError.message == NULL) + return; + + if (ctxt->_private) domcode = ((struct virParserData *) ctxt->_private)->domcode; - if (virGetLastError() == NULL && - ctxt->lastError.level == XML_ERR_FATAL && - ctxt->lastError.message != NULL) { - virGenericReportError(domcode, VIR_ERR_XML_DETAIL, - _("at line %d: %s"), - ctxt->lastError.line, - ctxt->lastError.message); - } + + cur = ctxt->input->cur; + base = ctxt->input->base; + + /* skip backwards over any end-of-lines */ + while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) { + cur--; } -} + /* search backwards for beginning-of-line (to max buff size) */ + while ((cur > base) && (*(cur) != '\n') && (*(cur) != '\r')) + cur--; + if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + + /* calculate the error position in terms of the current position */ + col = ctxt->input->cur - cur; + + /* search forward for end-of-line (to max buff size) */ + /* copy selected text to our buffer */ + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + virBufferAddChar(&buf, *cur++); + } + + /* create blank line with problem pointer */ + contextstr = virBufferContentAndReset(&buf); + + /* (leave buffer space for pointer + line terminator) */ + for (n = 0; (n<col) && (contextstr[n] != 0); n++) { + if (contextstr[n] == '\t') + virBufferAddChar(&buf, '\t'); + else + virBufferAddChar(&buf, '-'); + } + + virBufferAddChar(&buf, '^'); + + pointerstr = virBufferContentAndReset(&buf); + + if (ctxt->lastError.file) { + virGenericReportError(domcode, VIR_ERR_XML_DETAIL, + _("%s:%d: %s%s\n%s"), + ctxt->lastError.file, + ctxt->lastError.line, + ctxt->lastError.message, + contextstr, + pointerstr); + } else { + virGenericReportError(domcode, VIR_ERR_XML_DETAIL, + _("at line %d: %s%s\n%s"), + ctxt->lastError.line, + ctxt->lastError.message, + contextstr, + pointerstr); + } + + VIR_FREE(contextstr); + VIR_FREE(pointerstr); +} /** * virXMLParseHelper: -- 1.7.3.4

On Mon, Sep 05, 2011 at 02:33:39PM +0200, Peter Krempa wrote:
This patch modifies error handling function for the XML parser provided by libxml2.
Originaly only a line number and error message were logged. With this new error handler function, the user is provided with a more complex description of the parsing error.
Context of the error is printed in libXML2 style and filename of the file, that caused the error is printed. Example of an parse error:
13:41:36.262: 16032: error : catchXMLError:706 : /etc/libvirt/qemu/rh_bad.xml:58: Opening and ending tag mismatch: name line 2 and domain </domain> ---------^
Context of the error gives the user hints that may help to quickly locate a corrupt xml file.
fixes BZs: ---------- Bug 708735 - [RFE] Show column and line on XML parsing error https://bugzilla.redhat.com/show_bug.cgi?id=708735
Bug 726771 - libvirt does not specify problem file if persistent xml is invalid https://bugzilla.redhat.com/show_bug.cgi?id=726771 --- src/util/xml.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 76 insertions(+), 12 deletions(-)
diff --git a/src/util/xml.c b/src/util/xml.c index d301af6..b0942da 100644 --- a/src/util/xml.c +++ b/src/util/xml.c @@ -633,28 +633,92 @@ virXPathNodeSet(const char *xpath, * catchXMLError: * * Called from SAX on parsing errors in the XML. + * + * This version is heavily based on xmlParserPrintFileContextInternal from libxml2. */ static void catchXMLError(void *ctx, const char *msg ATTRIBUTE_UNUSED, ...) { - int domcode = VIR_FROM_XML; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
- if (ctxt) { - if (ctxt->_private) + const xmlChar *cur, *base; + unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ + int domcode = VIR_FROM_XML; + + virBuffer buf = VIR_BUFFER_INITIALIZER; + char *contextstr = NULL; + char *pointerstr = NULL; + + + /* conditions for error printing */ + if (!ctxt || + (virGetLastError() != NULL) || + ctxt->input == NULL || + ctxt->lastError.level != XML_ERR_FATAL || + ctxt->lastError.message == NULL) + return; + + if (ctxt->_private) domcode = ((struct virParserData *) ctxt->_private)->domcode;
- if (virGetLastError() == NULL && - ctxt->lastError.level == XML_ERR_FATAL && - ctxt->lastError.message != NULL) { - virGenericReportError(domcode, VIR_ERR_XML_DETAIL, - _("at line %d: %s"), - ctxt->lastError.line, - ctxt->lastError.message); - } + + cur = ctxt->input->cur; + base = ctxt->input->base; + + /* skip backwards over any end-of-lines */ + while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) { + cur--; } -}
+ /* search backwards for beginning-of-line (to max buff size) */ + while ((cur > base) && (*(cur) != '\n') && (*(cur) != '\r')) + cur--; + if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + + /* calculate the error position in terms of the current position */ + col = ctxt->input->cur - cur; + + /* search forward for end-of-line (to max buff size) */ + /* copy selected text to our buffer */ + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + virBufferAddChar(&buf, *cur++); + } + + /* create blank line with problem pointer */ + contextstr = virBufferContentAndReset(&buf); + + /* (leave buffer space for pointer + line terminator) */ + for (n = 0; (n<col) && (contextstr[n] != 0); n++) { + if (contextstr[n] == '\t') + virBufferAddChar(&buf, '\t'); + else + virBufferAddChar(&buf, '-'); + } + + virBufferAddChar(&buf, '^'); + + pointerstr = virBufferContentAndReset(&buf); + + if (ctxt->lastError.file) { + virGenericReportError(domcode, VIR_ERR_XML_DETAIL, + _("%s:%d: %s%s\n%s"), + ctxt->lastError.file, + ctxt->lastError.line, + ctxt->lastError.message, + contextstr, + pointerstr); + } else { + virGenericReportError(domcode, VIR_ERR_XML_DETAIL, + _("at line %d: %s%s\n%s"), + ctxt->lastError.line, + ctxt->lastError.message, + contextstr, + pointerstr); + } + + VIR_FREE(contextstr); + VIR_FREE(pointerstr); +}
ACK, looks okay, it's a bit convoluted, but the libxml2 routine got an awful lot of use over the years and works, so ... pushed, thanks ! Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml". NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it. --- src/conf/domain_conf.c | 2 +- src/security/virt-aa-helper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index cce9955..06baaaf 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7367,7 +7367,7 @@ virDomainDefParse(const char *xmlStr, xmlDocPtr xml; virDomainDefPtr def = NULL; - if ((xml = virXMLParse(filename, xmlStr, "domain.xml"))) { + if ((xml = virXMLParse(filename, xmlStr, NULL))) { def = virDomainDefParseNode(caps, xml, xmlDocGetRootElement(xml), expectedVirtTypes, flags); xmlFreeDoc(xml); diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c index bb577d3..1de2f77 100644 --- a/src/security/virt-aa-helper.c +++ b/src/security/virt-aa-helper.c @@ -641,7 +641,7 @@ caps_mockup(vahControl * ctl, const char *xmlStr) xmlDocPtr xml = NULL; xmlXPathContextPtr ctxt = NULL; - if (!(xml = virXMLParseStringCtxt(xmlStr, "domain.xml", &ctxt))) { + if (!(xml = virXMLParseStringCtxt(xmlStr, NULL, &ctxt))) { goto cleanup; } -- 1.7.3.4

On 09/05/2011 06:33 AM, Peter Krempa wrote:
Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml".
NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it. --- src/conf/domain_conf.c | 2 +- src/security/virt-aa-helper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
I still think a lot of the virXMLParseStringCtxt() calls in virsh.c need the same treatment. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 09/05/2011 05:02 PM, Eric Blake wrote:
On 09/05/2011 06:33 AM, Peter Krempa wrote:
Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml".
NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it. --- src/conf/domain_conf.c | 2 +- src/security/virt-aa-helper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
I still think a lot of the virXMLParseStringCtxt() calls in virsh.c need the same treatment.
Uhm, yes. You're right. I forgot about that. I'll post another patch to this series.

On Mon, Sep 05, 2011 at 02:33:40PM +0200, Peter Krempa wrote:
Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml".
NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it.
Well, I started that trend, and used that as a hint about what kind of XML paring might be failing. If you have a better idea on how to name that temporary "file" (it's actually a memory entity), but I'm not sure setting it as NULL is such an improvement, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Dňa 6.9.2011 9:41, Daniel Veillard wrote / napísal(a):
On Mon, Sep 05, 2011 at 02:33:40PM +0200, Peter Krempa wrote:
Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml".
NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it. Well, I started that trend, and used that as a hint about what kind of XML paring might be failing. If you have a better idea on how to name that temporary "file" (it's actually a memory entity), but I'm not sure setting it as NULL is such an improvement,
Daniel
The idea of this patch was to suppress printing domain.xml in the error message and stick with the default error message which stated only the line number and was used previously (it did not contain the filename). I'll have another look at this and try to come up with something more descriptitve, if it's possible. Peter

On Tue, Sep 06, 2011 at 09:58:35AM +0200, Peter Krempa wrote:
Dňa 6.9.2011 9:41, Daniel Veillard wrote / napísal(a):
On Mon, Sep 05, 2011 at 02:33:40PM +0200, Peter Krempa wrote:
Patch e81778d6184f1850a10eb661eb756b50421d5ac4 introduces printing file name on XML errors. This corrects the URL string to be NULL and therefore to print an error message not containing bogus filename "domain.xml".
NULL is a valid parameter for the file name value, as the only usage is for error handlers. Functions touching the value either in libxml2 or in libvirt check this parameter for NULL and behave according to it. Well, I started that trend, and used that as a hint about what kind of XML paring might be failing. If you have a better idea on how to name that temporary "file" (it's actually a memory entity), but I'm not sure setting it as NULL is such an improvement,
Daniel
The idea of this patch was to suppress printing domain.xml in the error message and stick with the default error message which stated only the line number and was used previously (it did not contain the filename).
yeah I understand the intent
I'll have another look at this and try to come up with something more descriptitve, if it's possible.
the problem is that the file has no name since it's in memory, but one could still try to give an hint about what it is. It's a bit weird and a case of usability, I might be wrong :-) Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Clean up instances of "domain.xml" in context of parsing XML documents in memory to support new XML parse error reporting function. --- tools/virsh.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index c7240e5..1b11f79 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -8616,7 +8616,7 @@ makeCloneXML(const char *origxml, const char *newname) { xmlChar *newxml = NULL; int size; - doc = virXMLParseStringCtxt(origxml, "domain.xml", &ctxt); + doc = virXMLParseStringCtxt(origxml, NULL, &ctxt); if (!doc) goto cleanup; @@ -10407,7 +10407,7 @@ cmdVNCDisplay(vshControl *ctl, const vshCmd *cmd) if (!doc) goto cleanup; - xml = virXMLParseStringCtxt(doc, "domain.xml", &ctxt); + xml = virXMLParseStringCtxt(doc, NULL, &ctxt); VIR_FREE(doc); if (!xml) goto cleanup; @@ -10475,7 +10475,7 @@ cmdTTYConsole(vshControl *ctl, const vshCmd *cmd) if (!doc) goto cleanup; - xml = virXMLParseStringCtxt(doc, "domain.xml", &ctxt); + xml = virXMLParseStringCtxt(doc, NULL, &ctxt); VIR_FREE(doc); if (!xml) goto cleanup; @@ -10862,7 +10862,7 @@ cmdDetachInterface(vshControl *ctl, const vshCmd *cmd) if (!doc) goto cleanup; - xml = virXMLParseStringCtxt(doc, "domain.xml", &ctxt); + xml = virXMLParseStringCtxt(doc, NULL, &ctxt); VIR_FREE(doc); if (!xml) { vshError(ctl, "%s", _("Failed to get interface information")); @@ -11329,7 +11329,7 @@ cmdDetachDisk(vshControl *ctl, const vshCmd *cmd) if (!doc) goto cleanup; - xml = virXMLParseStringCtxt(doc, "domain.xml", &ctxt); + xml = virXMLParseStringCtxt(doc, NULL, &ctxt); VIR_FREE(doc); if (!xml) { vshError(ctl, "%s", _("Failed to get disk information")); -- 1.7.3.4
participants (3)
-
Daniel Veillard
-
Eric Blake
-
Peter Krempa