Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.sql.catalyst.expressions.xml;
0019 
0020 import java.io.IOException;
0021 import java.io.Reader;
0022 
0023 import javax.xml.namespace.QName;
0024 import javax.xml.parsers.DocumentBuilder;
0025 import javax.xml.parsers.DocumentBuilderFactory;
0026 import javax.xml.parsers.ParserConfigurationException;
0027 import javax.xml.xpath.XPath;
0028 import javax.xml.xpath.XPathConstants;
0029 import javax.xml.xpath.XPathExpression;
0030 import javax.xml.xpath.XPathExpressionException;
0031 import javax.xml.xpath.XPathFactory;
0032 
0033 import org.w3c.dom.Node;
0034 import org.w3c.dom.NodeList;
0035 import org.xml.sax.InputSource;
0036 
0037 /**
0038  * Utility class for all XPath UDFs. Each UDF instance should keep an instance of this class.
0039  *
0040  * This is based on Hive's UDFXPathUtil implementation.
0041  */
0042 public class UDFXPathUtil {
0043   public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
0044   public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
0045   public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
0046   private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
0047   private DocumentBuilder builder = null;
0048   private XPath xpath = XPathFactory.newInstance().newXPath();
0049   private ReusableStringReader reader = new ReusableStringReader();
0050   private InputSource inputSource = new InputSource(reader);
0051 
0052   private XPathExpression expression = null;
0053   private String oldPath = null;
0054 
0055   public Object eval(String xml, String path, QName qname) throws XPathExpressionException {
0056     if (xml == null || path == null || qname == null) {
0057       return null;
0058     }
0059 
0060     if (xml.length() == 0 || path.length() == 0) {
0061       return null;
0062     }
0063 
0064     if (!path.equals(oldPath)) {
0065       try {
0066         expression = xpath.compile(path);
0067       } catch (XPathExpressionException e) {
0068         throw new RuntimeException("Invalid XPath '" + path + "'" + e.getMessage(), e);
0069       }
0070       oldPath = path;
0071     }
0072 
0073     if (expression == null) {
0074       return null;
0075     }
0076 
0077     if (builder == null){
0078       try {
0079         initializeDocumentBuilderFactory();
0080         builder = dbf.newDocumentBuilder();
0081       } catch (ParserConfigurationException e) {
0082         throw new RuntimeException(
0083           "Error instantiating DocumentBuilder, cannot build xml parser", e);
0084       }
0085     }
0086 
0087     reader.set(xml);
0088     try {
0089       return expression.evaluate(builder.parse(inputSource), qname);
0090     } catch (XPathExpressionException e) {
0091       throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
0092     } catch (Exception e) {
0093       throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
0094     }
0095   }
0096 
0097   private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
0098     dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
0099     dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
0100   }
0101 
0102   public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
0103     return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
0104   }
0105 
0106   public String evalString(String xml, String path) throws XPathExpressionException {
0107     return (String) eval(xml, path, XPathConstants.STRING);
0108   }
0109 
0110   public Double evalNumber(String xml, String path) throws XPathExpressionException {
0111     return (Double) eval(xml, path, XPathConstants.NUMBER);
0112   }
0113 
0114   public Node evalNode(String xml, String path) throws XPathExpressionException {
0115     return (Node) eval(xml, path, XPathConstants.NODE);
0116   }
0117 
0118   public NodeList evalNodeList(String xml, String path) throws XPathExpressionException {
0119     return (NodeList) eval(xml, path, XPathConstants.NODESET);
0120   }
0121 
0122   /**
0123    * Reusable, non-threadsafe version of {@link java.io.StringReader}.
0124    */
0125   public static class ReusableStringReader extends Reader {
0126 
0127     private String str = null;
0128     private int length = -1;
0129     private int next = 0;
0130     private int mark = 0;
0131 
0132     public ReusableStringReader() {
0133     }
0134 
0135     public void set(String s) {
0136       this.str = s;
0137       this.length = s.length();
0138       this.mark = 0;
0139       this.next = 0;
0140     }
0141 
0142     /** Check to make sure that the stream has not been closed */
0143     private void ensureOpen() throws IOException {
0144       if (str == null) {
0145         throw new IOException("Stream closed");
0146       }
0147     }
0148 
0149     @Override
0150     public int read() throws IOException {
0151       ensureOpen();
0152       if (next >= length) {
0153         return -1;
0154       }
0155       return str.charAt(next++);
0156     }
0157 
0158     @Override
0159     public int read(char[] cbuf, int off, int len) throws IOException {
0160       ensureOpen();
0161       if ((off < 0) || (off > cbuf.length) || (len < 0)
0162         || ((off + len) > cbuf.length) || ((off + len) < 0)) {
0163         throw new IndexOutOfBoundsException();
0164       } else if (len == 0) {
0165         return 0;
0166       }
0167       if (next >= length) {
0168         return -1;
0169       }
0170       int n = Math.min(length - next, len);
0171       str.getChars(next, next + n, cbuf, off);
0172       next += n;
0173       return n;
0174     }
0175 
0176     @Override
0177     public long skip(long ns) throws IOException {
0178       ensureOpen();
0179       if (next >= length) {
0180         return 0;
0181       }
0182       // Bound skip by beginning and end of the source
0183       int n = (int) Math.min(length - next, ns);
0184       n = Math.max(-next, n);
0185       next += n;
0186       return n;
0187     }
0188 
0189     @Override
0190     public boolean ready() throws IOException {
0191       ensureOpen();
0192       return true;
0193     }
0194 
0195     @Override
0196     public boolean markSupported() {
0197       return true;
0198     }
0199 
0200     @Override
0201     public void mark(int readAheadLimit) throws IOException {
0202       if (readAheadLimit < 0) {
0203         throw new IllegalArgumentException("Read-ahead limit < 0");
0204       }
0205       ensureOpen();
0206       mark = next;
0207     }
0208 
0209     @Override
0210     public void reset() throws IOException {
0211       ensureOpen();
0212       next = mark;
0213     }
0214 
0215     @Override
0216     public void close() {
0217       str = null;
0218     }
0219   }
0220 }