1
0
Fork 0
mirror of https://codeberg.org/Mercury-IM/Smack synced 2025-09-09 18:29:45 +02:00

Improve parseContent() and parseContentDepth()

The idea is that xml-roundtrip should *never* be expected from a
XmlPullParser. So what we need is a method that parses the content of an
element without relying on getText() returning text if on START_TAG or
END_TAG. This is already done by PubSubs ItemProvider.

Also add PacketParserUtils.parseElement() which will return the current
element as String and use this method in PubSub's ItemProvider.
This commit is contained in:
Florian Schmaus 2014-06-18 09:06:48 +02:00
parent 26b5bc0212
commit 56222d6ee2
8 changed files with 201 additions and 117 deletions

View file

@ -44,6 +44,7 @@ import org.jivesoftware.smack.provider.ProviderManager;
import org.jivesoftware.smack.sasl.SASLMechanism.SASLFailure;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;
/**
* Utility class that helps to parse packets. Any parsing packets method that must be shared
@ -54,6 +55,24 @@ import org.xmlpull.v1.XmlPullParserException;
public class PacketParserUtils {
private static final Logger LOGGER = Logger.getLogger(PacketParserUtils.class.getName());
/**
* Creates a new XmlPullParser suitable for parsing XMPP. This means in particular that
* FEATURE_PROCESS_NAMESPACES is enabled.
* <p>
* Note that not all XmlPullParser implementations will return a String on
* <code>getText()</code> if the parser is on START_TAG or END_TAG. So you must not rely on this
* behavior when using the parser.
* </p>
*
* @return A suitable XmlPullParser for XMPP parsing
* @throws XmlPullParserException
*/
public static XmlPullParser newXmppParser() throws XmlPullParserException {
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
return parser;
}
/**
* Parses a message packet.
*
@ -96,7 +115,7 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String subject = parseContent(parser);
String subject = parseElementText(parser);
if (message.getSubject(xmlLang) == null) {
message.addSubject(xmlLang, subject);
@ -108,8 +127,8 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String body = parseContent(parser);
String body = parseElementText(parser);
if (message.getBody(xmlLang) == null) {
message.addBody(xmlLang, body);
}
@ -140,29 +159,156 @@ public class PacketParserUtils {
}
/**
* Returns the content of a tag as string regardless of any tags included.
* Returns the textual content of an element as String.
* <p>
* The parser must be positioned on a START_TAG of an element which MUST NOT contain Mixed
* Content (as defined in XML 3.2.2), or else an XmlPullParserException will be thrown.
* </p>
* This method is used for the parts where the XMPP specification requires elements that contain
* only text or are the empty element.
*
* @param parser
* @return the textual content of the element as String
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElementText(XmlPullParser parser) throws XmlPullParserException, IOException {
assert (parser.getEventType() == XmlPullParser.START_TAG);
String res;
if (parser.isEmptyElementTag()) {
res = "";
}
else {
// Advance to the text of the Element
int event = parser.next();
if (event != XmlPullParser.TEXT) {
throw new XmlPullParserException(
"Non-empty element tag not followed by text, while Mixed Content (XML 3.2.2) is disallowed");
}
res = parser.getText();
event = parser.next();
if (event != XmlPullParser.END_TAG) {
throw new XmlPullParserException(
"Non-empty element tag contains child-elements, while Mixed Content (XML 3.2.2) is disallowed");
}
}
return res;
}
/**
* Returns the current element as string.
* <p>
* The parser must be positioned on START_TAG.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the element as string
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElement(XmlPullParser parser) throws XmlPullParserException, IOException {
assert(parser.getEventType() == XmlPullParser.START_TAG);
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content of a element as string.
* <p>
* The parser must be positioned on the START_TAG of the element which content is going to get
* returned. If the current element is the empty element, then the empty string is returned. If
* it is a element which contains just text, then just the text is returned. If it contains
* nested elements (and text), then everything from the current opening tag to the corresponding
* closing tag of the same depth is returned as String.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the content of a tag as string
* @throws XmlPullParserException if parser encounters invalid XML
* @throws IOException if an IO error occurs
*/
private static String parseContent(XmlPullParser parser)
public static String parseContent(XmlPullParser parser)
throws XmlPullParserException, IOException {
int parserDepth = parser.getDepth();
return parseContentDepth(parser, parserDepth);
assert(parser.getEventType() == XmlPullParser.START_TAG);
if (parser.isEmptyElementTag()) {
return "";
}
// Advance the parser, since we want to parse the content of the current element
parser.next();
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content from the current position of the parser up to the closing tag of the
* given depth. Note that only the outermost namespace attributes ("xmlns") will be returned,
* not nested ones.
* <p>
* This method is able to parse the content with MX- and KXmlParser. In order to achieve
* this some trade-off has to be make, because KXmlParser does not support xml-roundtrip (ie.
* return a String on getText() on START_TAG and END_TAG). We are therefore required to work
* around this limitation, which results in only partial support for XML namespaces ("xmlns"):
* Only the outermost namespace of elements will be included in the resulting String.
* </p>
*
* @param parser
* @param depth
* @return the content of the current depth
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseContentDepth(XmlPullParser parser, int depth) throws XmlPullParserException, IOException {
StringBuilder content = new StringBuilder();
while (!(parser.next() == XmlPullParser.END_TAG && parser.getDepth() == depth)) {
String text = parser.getText();
if (text == null) {
throw new IllegalStateException("Parser should never return 'null' on getText() here");
XmlStringBuilder xml = new XmlStringBuilder();
int event = parser.getEventType();
boolean isEmptyElement = false;
// XmlPullParser reports namespaces in nested elements even if *only* the outer ones defines
// it. This 'flag' ensures that when a namespace is set for an element, it won't be set again
// in a nested element. It's an ugly workaround that has the potential to break things.
String namespaceElement = null;;
while (true) {
if (event == XmlPullParser.START_TAG) {
xml.halfOpenElement(parser.getName());
if (namespaceElement == null) {
String namespace = parser.getNamespace();
if (StringUtils.isNotEmpty(namespace)) {
xml.attribute("xmlns", namespace);
namespaceElement = parser.getName();
}
}
for (int i = 0; i < parser.getAttributeCount(); i++) {
xml.attribute(parser.getAttributeName(i), parser.getAttributeValue(i));
}
if (parser.isEmptyElementTag()) {
xml.closeEmptyElement();
isEmptyElement = true;
}
else {
xml.rightAngelBracket();
}
}
content.append(text);
else if (event == XmlPullParser.END_TAG) {
if (isEmptyElement) {
// Do nothing as the element was already closed, just reset the flag
isEmptyElement = false;
}
else {
xml.closeElement(parser.getName());
}
if (namespaceElement != null && namespaceElement.equals(parser.getName())) {
// We are on the closing tag, which defined the namespace as starting tag, reset the 'flag'
namespaceElement = null;
}
if (parser.getDepth() <= depth) {
// Abort parsing, we are done
break;
}
}
else if (event == XmlPullParser.TEXT) {
xml.append(parser.getText());
}
event = parser.next();
}
return content.toString();
return xml.toString();
}
/**

View file

@ -19,7 +19,6 @@ package org.jivesoftware.smack;
import static org.junit.Assert.*;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.CopyOnWriteArrayList;
@ -31,11 +30,11 @@ import org.jivesoftware.smack.packet.RosterPacket;
import org.jivesoftware.smack.packet.IQ.Type;
import org.jivesoftware.smack.packet.RosterPacket.Item;
import org.jivesoftware.smack.packet.RosterPacket.ItemType;
import org.jivesoftware.smack.test.util.TestUtils;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.xmlpull.v1.XmlPullParserFactory;
import org.xmlpull.v1.XmlPullParser;
/**
@ -315,8 +314,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("<iq id=\"rostertest1\" type=\"set\" ")
.append("to=\"").append(connection.getUser()).append("\">")
@ -324,8 +321,7 @@ public class RosterTest {
.append("<item jid=\"").append(contactJID).append("\"/>")
.append("</query>")
.append("</iq>");
parser.setInput(new StringReader(sb.toString()));
parser.next();
final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();
@ -449,8 +445,6 @@ public class RosterTest {
final String contactJID = "nurse@example.com";
final Roster roster = connection.getRoster();
assertNotNull("Can't get the roster from the provided connection!", roster);
final XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
final StringBuilder sb = new StringBuilder();
sb.append("<iq id=\"rostertest2\" type=\"set\" ")
.append("to=\"").append(connection.getUser()).append("\">")
@ -460,8 +454,7 @@ public class RosterTest {
.append("</item>")
.append("</query>")
.append("</iq>");
parser.setInput(new StringReader(sb.toString()));
parser.next();
final XmlPullParser parser = TestUtils.getIQParser(sb.toString());
final IQ rosterPush = PacketParserUtils.parseIQ(parser, connection);
initRoster(connection, roster);
rosterListener.reset();

View file

@ -35,7 +35,7 @@ public class ParsingExceptionTest {
private final static String EXTENSION2 =
"<extension2 xmlns='namespace'>" +
"<bar node='testNode'>" +
"<i id='testid1' >" +
"<i id='testid1'>" +
"</i>" +
"</bar>" +
"</extension2>";
@ -52,12 +52,14 @@ public class ParsingExceptionTest {
@Test
public void consumeUnparsedInput() throws Exception {
final String MESSAGE_EXCEPTION_ELEMENT =
"<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
"<nothingInHere>" +
"</nothingInHere>" +
"</" + ThrowException.ELEMENT + ">";
XmlPullParser parser = TestUtils.getMessageParser(
"<message from='user@server.example' to='francisco@denmark.lit' id='foo'>" +
"<" + ThrowException.ELEMENT + " xmlns='" + ThrowException.NAMESPACE + "'>" +
"<nothingInHere>" +
"</nothingInHere>" +
"</" + ThrowException.ELEMENT + ">" +
MESSAGE_EXCEPTION_ELEMENT +
EXTENSION2 +
"</message>");
int parserDepth = parser.getDepth();
@ -68,8 +70,7 @@ public class ParsingExceptionTest {
content = PacketParserUtils.parseContentDepth(parser, parserDepth);
}
assertNotNull(content);
assertEquals(content, "<nothingInHere></nothingInHere>" + "</" + ThrowException.ELEMENT + ">" + EXTENSION2);
assertEquals(MESSAGE_EXCEPTION_ELEMENT + EXTENSION2 + "</message>", content);
}
static class ThrowException implements PacketExtensionProvider {

View file

@ -17,9 +17,10 @@
package org.jivesoftware.smack.test.util;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.xmlpull.v1.XmlPullParserFactory;
import org.jivesoftware.smack.util.PacketParserUtils;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
@ -39,12 +40,18 @@ final public class TestUtils {
return getParser(stanza, "presence");
}
public static XmlPullParser getParser(String stanza, String startTag) {
public static XmlPullParser getParser(String string, String startTag) {
return getParser(new StringReader(string), startTag);
}
public static XmlPullParser getParser(Reader reader, String startTag) {
XmlPullParser parser;
try {
parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
parser.setInput(new StringReader(stanza));
parser = PacketParserUtils.newXmppParser();
parser.setInput(reader);
if (startTag == null) {
return parser;
}
boolean found = false;
while (!found) {
@ -53,8 +60,7 @@ final public class TestUtils {
}
if (!found)
throw new IllegalArgumentException("Cannot parse start tag [" + startTag + "] from stanza [" + stanza
+ "]");
throw new IllegalArgumentException("Can not find start tag '" + startTag + "'");
} catch (XmlPullParserException e) {
throw new RuntimeException(e);
} catch (IOException e) {

View file

@ -27,7 +27,6 @@ import static org.junit.Assert.fail;
import java.util.Locale;
import java.util.Properties;
import org.custommonkey.xmlunit.DetailedDiff;
import org.custommonkey.xmlunit.Diff;
import org.custommonkey.xmlunit.examples.RecursiveElementNameAndTextQualifier;
import org.jivesoftware.smack.packet.Message;
@ -654,7 +653,13 @@ public class PacketParserUtilsTest {
}
@Test
/**
* RFC6121 5.2.3 explicitly disallows mixed content in <body/> elements. Make sure that we throw
* an exception if we encounter such an element.
*
* @throws Exception
*/
@Test(expected=XmlPullParserException.class)
public void invalidMessageBodyContainingTagTest() throws Exception {
String control = XMLBuilder.create("message")
.a("from", "romeo@montague.lit/orchard")
@ -668,23 +673,10 @@ public class PacketParserUtilsTest {
.a("style", "font-weight: bold;")
.t("Bad Message Body")
.asString(outputProperties);
try {
Message message = (Message) PacketParserUtils.parseMessage(TestUtils.getMessageParser(control));
String body = "<span style=\"font-weight: bold;\">"
+ "Bad Message Body</span>";
assertEquals(body, message.getBody());
assertXMLNotEqual(control, message.toXML().toString());
DetailedDiff diffs = new DetailedDiff(new Diff(control, message.toXML().toString()));
// body has no namespace URI, span is escaped
assertEquals(6, diffs.getAllDifferences().size());
} catch(XmlPullParserException e) {
fail("No parser exception should be thrown" + e.getMessage());
}
Message message = (Message) PacketParserUtils.parseMessage(TestUtils.getMessageParser(control));
fail("Should throw exception. Instead got message: " + message.toXML().toString());
}
@Test
@ -794,7 +786,7 @@ public class PacketParserUtilsTest {
public void parseContentDepthTest() throws Exception {
final String stanza = "<iq type='result' to='foo@bar.com' from='baz.com' id='42'/>";
XmlPullParser parser = TestUtils.getParser(stanza, "iq");
String content = PacketParserUtils.parseContentDepth(parser, 1);
String content = PacketParserUtils.parseContent(parser);
assertEquals("", content);
}