0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.sql.catalyst.expressions.xml;
0019
0020 import java.io.IOException;
0021 import java.io.Reader;
0022
0023 import javax.xml.namespace.QName;
0024 import javax.xml.parsers.DocumentBuilder;
0025 import javax.xml.parsers.DocumentBuilderFactory;
0026 import javax.xml.parsers.ParserConfigurationException;
0027 import javax.xml.xpath.XPath;
0028 import javax.xml.xpath.XPathConstants;
0029 import javax.xml.xpath.XPathExpression;
0030 import javax.xml.xpath.XPathExpressionException;
0031 import javax.xml.xpath.XPathFactory;
0032
0033 import org.w3c.dom.Node;
0034 import org.w3c.dom.NodeList;
0035 import org.xml.sax.InputSource;
0036
0037
0038
0039
0040
0041
0042 public class UDFXPathUtil {
0043 public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
0044 public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
0045 public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
0046 private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
0047 private DocumentBuilder builder = null;
0048 private XPath xpath = XPathFactory.newInstance().newXPath();
0049 private ReusableStringReader reader = new ReusableStringReader();
0050 private InputSource inputSource = new InputSource(reader);
0051
0052 private XPathExpression expression = null;
0053 private String oldPath = null;
0054
0055 public Object eval(String xml, String path, QName qname) throws XPathExpressionException {
0056 if (xml == null || path == null || qname == null) {
0057 return null;
0058 }
0059
0060 if (xml.length() == 0 || path.length() == 0) {
0061 return null;
0062 }
0063
0064 if (!path.equals(oldPath)) {
0065 try {
0066 expression = xpath.compile(path);
0067 } catch (XPathExpressionException e) {
0068 throw new RuntimeException("Invalid XPath '" + path + "'" + e.getMessage(), e);
0069 }
0070 oldPath = path;
0071 }
0072
0073 if (expression == null) {
0074 return null;
0075 }
0076
0077 if (builder == null){
0078 try {
0079 initializeDocumentBuilderFactory();
0080 builder = dbf.newDocumentBuilder();
0081 } catch (ParserConfigurationException e) {
0082 throw new RuntimeException(
0083 "Error instantiating DocumentBuilder, cannot build xml parser", e);
0084 }
0085 }
0086
0087 reader.set(xml);
0088 try {
0089 return expression.evaluate(builder.parse(inputSource), qname);
0090 } catch (XPathExpressionException e) {
0091 throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
0092 } catch (Exception e) {
0093 throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
0094 }
0095 }
0096
0097 private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
0098 dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
0099 dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
0100 }
0101
0102 public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
0103 return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
0104 }
0105
0106 public String evalString(String xml, String path) throws XPathExpressionException {
0107 return (String) eval(xml, path, XPathConstants.STRING);
0108 }
0109
0110 public Double evalNumber(String xml, String path) throws XPathExpressionException {
0111 return (Double) eval(xml, path, XPathConstants.NUMBER);
0112 }
0113
0114 public Node evalNode(String xml, String path) throws XPathExpressionException {
0115 return (Node) eval(xml, path, XPathConstants.NODE);
0116 }
0117
0118 public NodeList evalNodeList(String xml, String path) throws XPathExpressionException {
0119 return (NodeList) eval(xml, path, XPathConstants.NODESET);
0120 }
0121
0122
0123
0124
0125 public static class ReusableStringReader extends Reader {
0126
0127 private String str = null;
0128 private int length = -1;
0129 private int next = 0;
0130 private int mark = 0;
0131
0132 public ReusableStringReader() {
0133 }
0134
0135 public void set(String s) {
0136 this.str = s;
0137 this.length = s.length();
0138 this.mark = 0;
0139 this.next = 0;
0140 }
0141
0142
0143 private void ensureOpen() throws IOException {
0144 if (str == null) {
0145 throw new IOException("Stream closed");
0146 }
0147 }
0148
0149 @Override
0150 public int read() throws IOException {
0151 ensureOpen();
0152 if (next >= length) {
0153 return -1;
0154 }
0155 return str.charAt(next++);
0156 }
0157
0158 @Override
0159 public int read(char[] cbuf, int off, int len) throws IOException {
0160 ensureOpen();
0161 if ((off < 0) || (off > cbuf.length) || (len < 0)
0162 || ((off + len) > cbuf.length) || ((off + len) < 0)) {
0163 throw new IndexOutOfBoundsException();
0164 } else if (len == 0) {
0165 return 0;
0166 }
0167 if (next >= length) {
0168 return -1;
0169 }
0170 int n = Math.min(length - next, len);
0171 str.getChars(next, next + n, cbuf, off);
0172 next += n;
0173 return n;
0174 }
0175
0176 @Override
0177 public long skip(long ns) throws IOException {
0178 ensureOpen();
0179 if (next >= length) {
0180 return 0;
0181 }
0182
0183 int n = (int) Math.min(length - next, ns);
0184 n = Math.max(-next, n);
0185 next += n;
0186 return n;
0187 }
0188
0189 @Override
0190 public boolean ready() throws IOException {
0191 ensureOpen();
0192 return true;
0193 }
0194
0195 @Override
0196 public boolean markSupported() {
0197 return true;
0198 }
0199
0200 @Override
0201 public void mark(int readAheadLimit) throws IOException {
0202 if (readAheadLimit < 0) {
0203 throw new IllegalArgumentException("Read-ahead limit < 0");
0204 }
0205 ensureOpen();
0206 mark = next;
0207 }
0208
0209 @Override
0210 public void reset() throws IOException {
0211 ensureOpen();
0212 next = mark;
0213 }
0214
0215 @Override
0216 public void close() {
0217 str = null;
0218 }
0219 }
0220 }