1
0
Fork 0
mirror of https://codeberg.org/Mercury-IM/Smack synced 2025-12-08 14:11:07 +01:00

Improve parseContent() and parseContentDepth()

The idea is that xml-roundtrip should *never* be expected from a
XmlPullParser. So what we need is a method that parses the content of an
element without relying on getText() returning text if on START_TAG or
END_TAG. This is already done by PubSubs ItemProvider.

Also add PacketParserUtils.parseElement() which will return the current
element as String and use this method in PubSub's ItemProvider.
This commit is contained in:
Florian Schmaus 2014-06-18 09:06:48 +02:00
parent 26b5bc0212
commit 56222d6ee2
8 changed files with 201 additions and 117 deletions

View file

@ -44,6 +44,7 @@ import org.jivesoftware.smack.provider.ProviderManager;
import org.jivesoftware.smack.sasl.SASLMechanism.SASLFailure;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;
/**
* Utility class that helps to parse packets. Any parsing packets method that must be shared
@ -54,6 +55,24 @@ import org.xmlpull.v1.XmlPullParserException;
public class PacketParserUtils {
private static final Logger LOGGER = Logger.getLogger(PacketParserUtils.class.getName());
/**
* Creates a new XmlPullParser suitable for parsing XMPP. This means in particular that
* FEATURE_PROCESS_NAMESPACES is enabled.
* <p>
* Note that not all XmlPullParser implementations will return a String on
* <code>getText()</code> if the parser is on START_TAG or END_TAG. So you must not rely on this
* behavior when using the parser.
* </p>
*
* @return A suitable XmlPullParser for XMPP parsing
* @throws XmlPullParserException
*/
public static XmlPullParser newXmppParser() throws XmlPullParserException {
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
return parser;
}
/**
* Parses a message packet.
*
@ -96,7 +115,7 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String subject = parseContent(parser);
String subject = parseElementText(parser);
if (message.getSubject(xmlLang) == null) {
message.addSubject(xmlLang, subject);
@ -108,8 +127,8 @@ public class PacketParserUtils {
xmlLang = defaultLanguage;
}
String body = parseContent(parser);
String body = parseElementText(parser);
if (message.getBody(xmlLang) == null) {
message.addBody(xmlLang, body);
}
@ -140,29 +159,156 @@ public class PacketParserUtils {
}
/**
* Returns the content of a tag as string regardless of any tags included.
* Returns the textual content of an element as String.
* <p>
* The parser must be positioned on a START_TAG of an element which MUST NOT contain Mixed
* Content (as defined in XML 3.2.2), or else an XmlPullParserException will be thrown.
* </p>
* This method is used for the parts where the XMPP specification requires elements that contain
* only text or are the empty element.
*
* @param parser
* @return the textual content of the element as String
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElementText(XmlPullParser parser) throws XmlPullParserException, IOException {
assert (parser.getEventType() == XmlPullParser.START_TAG);
String res;
if (parser.isEmptyElementTag()) {
res = "";
}
else {
// Advance to the text of the Element
int event = parser.next();
if (event != XmlPullParser.TEXT) {
throw new XmlPullParserException(
"Non-empty element tag not followed by text, while Mixed Content (XML 3.2.2) is disallowed");
}
res = parser.getText();
event = parser.next();
if (event != XmlPullParser.END_TAG) {
throw new XmlPullParserException(
"Non-empty element tag contains child-elements, while Mixed Content (XML 3.2.2) is disallowed");
}
}
return res;
}
/**
* Returns the current element as string.
* <p>
* The parser must be positioned on START_TAG.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the element as string
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseElement(XmlPullParser parser) throws XmlPullParserException, IOException {
assert(parser.getEventType() == XmlPullParser.START_TAG);
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content of a element as string.
* <p>
* The parser must be positioned on the START_TAG of the element which content is going to get
* returned. If the current element is the empty element, then the empty string is returned. If
* it is a element which contains just text, then just the text is returned. If it contains
* nested elements (and text), then everything from the current opening tag to the corresponding
* closing tag of the same depth is returned as String.
* </p>
* Note that only the outermost namespace attributes ("xmlns") will be returned, not nested ones.
*
* @param parser the XML pull parser
* @return the content of a tag as string
* @throws XmlPullParserException if parser encounters invalid XML
* @throws IOException if an IO error occurs
*/
private static String parseContent(XmlPullParser parser)
public static String parseContent(XmlPullParser parser)
throws XmlPullParserException, IOException {
int parserDepth = parser.getDepth();
return parseContentDepth(parser, parserDepth);
assert(parser.getEventType() == XmlPullParser.START_TAG);
if (parser.isEmptyElementTag()) {
return "";
}
// Advance the parser, since we want to parse the content of the current element
parser.next();
return parseContentDepth(parser, parser.getDepth());
}
/**
* Returns the content from the current position of the parser up to the closing tag of the
* given depth. Note that only the outermost namespace attributes ("xmlns") will be returned,
* not nested ones.
* <p>
* This method is able to parse the content with MX- and KXmlParser. In order to achieve
* this some trade-off has to be make, because KXmlParser does not support xml-roundtrip (ie.
* return a String on getText() on START_TAG and END_TAG). We are therefore required to work
* around this limitation, which results in only partial support for XML namespaces ("xmlns"):
* Only the outermost namespace of elements will be included in the resulting String.
* </p>
*
* @param parser
* @param depth
* @return the content of the current depth
* @throws XmlPullParserException
* @throws IOException
*/
public static String parseContentDepth(XmlPullParser parser, int depth) throws XmlPullParserException, IOException {
StringBuilder content = new StringBuilder();
while (!(parser.next() == XmlPullParser.END_TAG && parser.getDepth() == depth)) {
String text = parser.getText();
if (text == null) {
throw new IllegalStateException("Parser should never return 'null' on getText() here");
XmlStringBuilder xml = new XmlStringBuilder();
int event = parser.getEventType();
boolean isEmptyElement = false;
// XmlPullParser reports namespaces in nested elements even if *only* the outer ones defines
// it. This 'flag' ensures that when a namespace is set for an element, it won't be set again
// in a nested element. It's an ugly workaround that has the potential to break things.
String namespaceElement = null;;
while (true) {
if (event == XmlPullParser.START_TAG) {
xml.halfOpenElement(parser.getName());
if (namespaceElement == null) {
String namespace = parser.getNamespace();
if (StringUtils.isNotEmpty(namespace)) {
xml.attribute("xmlns", namespace);
namespaceElement = parser.getName();
}
}
for (int i = 0; i < parser.getAttributeCount(); i++) {
xml.attribute(parser.getAttributeName(i), parser.getAttributeValue(i));
}
if (parser.isEmptyElementTag()) {
xml.closeEmptyElement();
isEmptyElement = true;
}
else {
xml.rightAngelBracket();
}
}
content.append(text);
else if (event == XmlPullParser.END_TAG) {
if (isEmptyElement) {
// Do nothing as the element was already closed, just reset the flag
isEmptyElement = false;
}
else {
xml.closeElement(parser.getName());
}
if (namespaceElement != null && namespaceElement.equals(parser.getName())) {
// We are on the closing tag, which defined the namespace as starting tag, reset the 'flag'
namespaceElement = null;
}
if (parser.getDepth() <= depth) {
// Abort parsing, we are done
break;
}
}
else if (event == XmlPullParser.TEXT) {
xml.append(parser.getText());
}
event = parser.next();
}
return content.toString();
return xml.toString();
}
/**