gpt4 book ai didi

java - XML 解析器和 xpath 表达式

转载 作者:行者123 更新时间:2023-11-29 09:10:58 24 4
gpt4 key购买 nike

我正在使用 java default documentbuilder 来解析一个少于 100 行代码的 xml 文档。解析一个文档需要 35 毫秒,执行单个 xpath 表达式需要 15 毫秒。如何优化 xml 和解析器所花费的时间? .

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public class XMLParser {


public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName());

private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>();

private Document document;

public XMLParser(File file){
this.document = XMLUtil.getDocument(file);
}

public void setProperties(Element file){
NodeList properties = file.getElementsByTagName("property");
List<NamedNodeMap> props = new ArrayList<NamedNodeMap>();
String type = file.getAttribute("type");
String path = file.getAttribute("path");

if("".equals(path)){
LOGGER.log(Level.INFO,"Attribute path is required for a file.");
return;
}

path = path+":"+type;

for(int i = 0;i<properties.getLength();i++){
Element property = (Element) properties.item(i);
props.add(property.getAttributes());
}
setProperties(props,path);
}

private void setProperties(List<NamedNodeMap> properties , String path){
List<NamedNodeMap> previousValue = fileVsProperties.get(path);
if(previousValue != null){
previousValue.addAll(properties);
}else{
fileVsProperties.put(path,properties);
}

}

public Element getConfiguration(String branchName) throws XPathExpressionException{
return (Element)XMLUtil.getElements("/configurations/configuration[@name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE);
}

public static void main(String[] args) throws XPathExpressionException {
long start = System.currentTimeMillis();
File doc = new File("install.xml");
XMLParser parser = new XMLParser(doc);
long end = System.currentTimeMillis();
System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds");
start = end;
Element configuration = parser.getConfiguration("BHARATHIKANNAN");
end = System.currentTimeMillis();
System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds");
start = end;
NodeList files = parser.getFiles(configuration);
for(int i=0;i<files.getLength();i++){
parser.setProperties((Element) files.item(i));
}
end = System.currentTimeMillis();
System.out.println(parser.fileVsProperties);
System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds");
}

public NodeList getFiles(Element configuration){
return configuration.getElementsByTagName("file");
}

}


class XMLUtil{
private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
private static DocumentBuilder builder;
public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName());

private static XPathFactory xpathFactory = XPathFactory.newInstance();

private static XPath xpath;

static {

try {
builder = factory.newDocumentBuilder();
xpath = xpathFactory.newXPath();
} catch (ParserConfigurationException e) {
LOGGER.log(Level.INFO,"");
}
}

public static Document getDocument(File f){
Document doc = null;
try {
doc = builder.parse(f);
} catch (SAXException e) {
LOGGER.log(Level.WARNING,"Invalid XML Document ",e);
} catch (IOException e) {
LOGGER.log(Level.SEVERE,"No Document Found in the given path",e);
}
return doc;
}

public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{
return xpath.evaluate(xpathExpression, ele,dataType);
}


}

XML 文件

    <?xml version="1.0"?>
<!--
Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration
node .
-->
<configurations>
<configuration name="default">
<files>
<file type="xml" path="conf/server.xml.orig">
<property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property>
<property regex="(port=).*" replace="\18080"></property>
</file>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
<configuration name="BHARATHIKANNAN" extends="default">
<files>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
</configurations>

输出:

Time Taken For Parsing :: 24 milliseconds
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds
{conf/system_properties.conf:text=[com.sun.org.apache.xerces.internal.dom.AttributeMap@75d9fd51]}
Time Taken For Setting Properties :: 0 milliseconds

最佳答案

最近有人问了一个非常相似的任务,但文档更大 (2Mb),我在这里给出了一些 Saxon 计时:

https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614

这些计时比您在更大的文档上看到的要快得多。由于您已经在使用 Java,因此切换到 Saxon 应该非常简单。

但需要注意的是,您在进入 main() 时立即开始计时,这意味着您主要测量类加载时间而不是 XML 处理时间。我的测量在测量开始前注意预热 Java VM。

请注意,如果您使用的是 Saxon,到目前为止最好使用 Saxon 的原生树模型,而不是 DOM 或其他替代模型。我们最近在这里发布了一些测量结果:

http://dev.saxonica.com/blog/mike/2012/09/index.html#000194

DOM 的结果平均比 Saxon 的本地树差 8 倍,在最坏的情况下差 23 倍。

关于java - XML 解析器和 xpath 表达式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12527491/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com