gpt4 book ai didi

java - java中的XML文档遍历器

转载 作者:太空宇宙 更新时间:2023-11-04 08:05:00 25 4
gpt4 key购买 nike

每个人都知道我们可以使用 DocumentTraversal 的 NodeIterator 遍历整个 xml 文档。我的应用程序需要一些额外的工作,因此我决定在 java Stack<> 的支持下编写自己的 XML 遍历器。

这是我的代码(我不擅长编码,因此代码和逻辑可能看起来很困惑)。

public class test
{
private static Stack<Node> gStack = new Stack<Node>();

public static void main(String[] args) throws XPathExpressionException
{
String str =
"<section>"
+ "<paragraph>This example combines regular wysiwyg editing of a document with very controlled editing of semantic rich content. The main content can be"
+ "edited like you would in a normal word processor. Though the difference is that the content remains schema valid XML because Xopus will not allow you to perform actions"
+ "on the document that would render it invalid.</paragraph>"
+ "<paragraph>The table is an example of controlled style. The style of the table is controlled by three attributes:</paragraph>"
+ "<unorderedlist>"
+ "<item><paragraph><emphasis>alternaterowcolor</emphasis>, do all rows have the same color, or should the background color alternate?</paragraph></item>"
+ "<item><paragraph><emphasis>border</emphasis>, a limited choice of border styles.</paragraph></item>"
+ "<item><paragraph><emphasis>color</emphasis>, a limited choice of colors.</paragraph></item>"
+ "</unorderedlist>"
+ "<paragraph>You have quite some freedom to style the table, but you can't break the predefined style.</paragraph>"
+ "</section>";

Document domDoc = null;
try
{
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
ByteArrayInputStream bis = new ByteArrayInputStream(str.getBytes());
domDoc = docBuilder.parse(bis);
}
catch (Exception e)
{
e.printStackTrace();
}

Element root = null;
NodeList list = domDoc.getChildNodes();
for (int i = 0; i < list.getLength(); i++)
{
if (list.item(i) instanceof Element)
{
root = (Element) list.item(i);
break;
}
}

NodeList nlist = root.getChildNodes();

System.out.println("root = " + root.getNodeName() + " child count = " + nlist.getLength());
domTraversor(root);
}

private static void domTraversor(Node node)
{
if (node.getNodeName().equals("#text"))
{
System.out.println("textElem = " + node.getTextContent());
if (node.getNextSibling() != null)
{
gStack.push(node.getNextSibling());
domTraversor(node.getNextSibling());
}
else
{
if (node.getParentNode().getNextSibling() != null)
domTraversor(node.getParentNode().getNextSibling());
}
}
else
{
if (node.getChildNodes().getLength() > 1)
{
gStack.push(node);
Node n = node.getFirstChild();
if (n.getNodeName().equals("#text"))
{
System.out.println("textElem = " + n.getTextContent());
if (n.getNextSibling() != null)
{
gStack.push(n.getNextSibling());
domTraversor(n.getNextSibling());
}
}
else
{
gStack.push(n);
domTraversor(n);
}
}
else if (node.getChildNodes().getLength() == 1)
{
Node fnode = node.getFirstChild();
if (fnode.getChildNodes().getLength() > 1)
{
gStack.push(fnode);
domTraversor(fnode);
}
else
{
if (!fnode.getNodeName().equals("#text"))
{
gStack.push(fnode);
domTraversor(fnode);
}
else
{
System.out.println("textElem = " + fnode.getTextContent());
if (fnode.getNodeName().equals("#text"))
{
if (node.getNextSibling() != null)
{
gStack.push(node.getNextSibling());
domTraversor(node.getNextSibling());
}
else
{
if (!gStack.empty())
{
Node sibPn = gStack.pop();
if (sibPn.getNextSibling() == null)
{
sibPn = gStack.pop();
}
domTraversor(sibPn.getNextSibling());
}
}
}
else
{
if (fnode.getNextSibling() != null)
{
domTraversor(fnode.getNextSibling());
}
else
{
if (!gStack.empty())
{
Node sibPn = gStack.pop().getNextSibling();
domTraversor(sibPn);
}
}
}
}
}
}
}
}
}

它可以很好地处理某些 xml 文档,但不能处理具有类似标签的文档。

<unorderedlist>
<item>
<paragraph>
<emphasis>alternaterowcolor</emphasis>
, do all rows have the same color, or should the background
color
alternate?
</paragraph>
</item>
<item>
<paragraph>
<emphasis>border</emphasis>
, a limited choice of border styles.
</paragraph>
</item>
<item>
<paragraph>
<emphasis>color</emphasis>
, a limited choice of colors.
</paragraph>
</item>
</unorderedlist>

这里是如果任何元素有超过三个嵌套子元素的情况,我的代码就会停止并且不再继续。

是否有更好的实现,请提出。

最佳答案

试试这个方法

Element e;
NodeList n;
Document doc=StudyParser.XMLfromString(xmlString);
String starttag=doc.getFirstChild().getNodeName();
Log.e("start",starttag );
n=doc.getElementsByTagName(starttag);
for(int i=0;i<n.getLength();i++){
e=(Element)n.item(i);
NodeList np = e.getElementsByTagName("item");
for(int j=0;j<np.getLength();j++){
e=(Element)n.item(i);
try{
String para=StudyParser.getValue(e, "paragraph");
Log.e("paravalue",para);
String emp=StudyParser.getValue(e, "emphasis");
Log.e("empval",emp);
}catch(Exception e){
e.printStackTrace();
}
}
}

StudyParser 类

    import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;



public class StudyParser {
public StudyParser() {

}

public final static Document XMLfromString(String xml){
Document doc = null;

DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {

DocumentBuilder db = dbf.newDocumentBuilder();

InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
doc = db.parse(is);

} catch (ParserConfigurationException e) {
System.out.println("XML parse error: " + e.getMessage());
return null;
} catch (SAXException e) {
System.out.println("Wrong XML file structure: " + e.getMessage());
return null;
} catch (IOException e) {
System.out.println("I/O exeption: " + e.getMessage());
return null;
}

return doc;

}
public static String getXMLstring(String xml){
String line = null;

try {

DefaultHttpClient httpClient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost(xml);

HttpResponse httpResponse = httpClient.execute(httpPost);
HttpEntity httpEntity = httpResponse.getEntity();
line = EntityUtils.toString(httpEntity);

} catch (UnsupportedEncodingException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
} catch (MalformedURLException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
} catch (IOException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
}

return line;

}
public static String getXML(InputStream is)throws IOException {

BufferedInputStream bis = new BufferedInputStream(is);
ByteArrayOutputStream buf = new ByteArrayOutputStream();
int result = bis.read();
while(result != -1) {
byte b = (byte)result;
buf.write(b);
result = bis.read();
}
return buf.toString();
}
public final static String getElementValue( Node elem ) {
Node kid;
if( elem != null){
if (elem.hasChildNodes()){
for( kid = elem.getFirstChild(); kid != null; kid = kid.getNextSibling() ){
if( kid.getNodeType() == Node.TEXT_NODE ){
return kid.getNodeValue();
}

}
}
}
return "";
}
public static int numResults(Document doc){
Node results = doc.getDocumentElement();
int res = -1;

try{
res = Integer.valueOf(results.getAttributes().getNamedItem("Categories").getNodeValue());
}catch(Exception e ){
res = -1;
}

return res;
}

public static String getValue(Element item, String str) {
NodeList n = item.getElementsByTagName(str);
return StudyParser.getElementValue(n.item(0));
}


}

只是动态 xml 的普通演示,我假设了相同的 xml,但不使用 getElementByTagName 有许多属性,您可以检查 accodringlly 看到

   doc = StudyParser.XMLfromString(xml);
String starttag=doc.getFirstChild().getNodeName();
Log.e("start",starttag );
n=doc.getElementsByTagName(starttag);
for(int i=0;i<n.getLength();i++){
e=(Element)n.item(i);
try{
Log.e("1234",""+ e.getTextContent());

}catch(Exception e){
e.printStackTrace();
}

}

关于java - java中的XML文档遍历器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12224899/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com