gpt4 book ai didi

javax.swing.text.ElementIterator 奇怪的行为

转载 作者:太空宇宙 更新时间:2023-11-04 08:39:41 24 4
gpt4 key购买 nike

我在使用 javax.swing.text.ElementIterator() 时遇到了奇怪的行为。它从不显示所有元素,并且根据我使用的 ParserCallback 类型显示不同数量的元素。下面的测试是在我的个人资料中的网站上完成的,但也可以使用任何其他大的 html 文件来完成。

// some imports shown in case its an import mixup
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.ChangedCharSetException;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTMLEditorKit.Parser;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.ParserDelegator;

// Shows whats in an element, recursively
public void printElement(HTMLDocument htmlDoc, Element element)
throws BadLocationException
{
AttributeSet attributes = element.getAttributes();
System.out.println("element: '" + element.toString().trim() + "', name: '" + element.getName() + "', children: " + element.getElementCount() + ", attributes: " + attributes.getAttributeCount() + ", leaf: " + element.isLeaf());
Enumeration attrEnum = attributes.getAttributeNames();
while (attrEnum.hasMoreElements())
{
Object attr = attrEnum.nextElement();
System.out.println("\tAttribute: '" + attr + "', Val: '" + attributes.getAttribute(attr) + "'");
if (attr == StyleConstants.NameAttribute
&& attributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT)
{
int startOffset = element.getStartOffset();
int endOffset = element.getEndOffset();
int length = endOffset - startOffset;
System.out.printf("\t\tContent (%d-%d): '%s'\n", startOffset, endOffset, htmlDoc.getText(startOffset, length).trim());
}
}
for (int i = 0; i < element.getElementCount(); i++)
{
Element child = element.getElement(i);
printElement(htmlDoc, child);
}
}

public void tryParse(String filename)
throws FileNotFoundException, IOException, BadLocationException
{
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));

Parser parser = new ParserDelegator();
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
ParserCallback callback2 = htmlDoc.getReader(0);
ParserCallback callback1 =
new HTMLEditorKit.ParserCallback()
{
};

parser.parse(in, callback2, true);
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null)
printElement(htmlDoc, element);
in.close();
}

在上面的测试中,如果我使用回调1或回调2,结果会有所不同。更奇怪的是,如果我确实用适当的函数填充回调并让它们输出一些内容,它们会显示解析器确实处理整个网站,但 ElementIterator 仍然没有全部。

我也尝试过使用 htmlKit.read() 而不是 parser.parse(),但仍然不起作用。

虽然我现在通过使用解析器回调函数(此处未显示)获得了所需的结果,但我仍然想知道为什么 ElementIterator 不能按预期工作,以防我稍后需要它,所以我想知道这里是否有人有该 ElementIterator 的经验并可以回答。

更新:完整的 Java 源代码已上传至此处: http://home.snafu.de/tilman/tmp/Main.java

最佳答案

使用看到的方法here ,我还没有注意到你描述的问题。我添加了 println(),所有元素似乎都在那里。

附录:我不确定您的 tryParse() 是如何失败的,但您的 printElement() 似乎可以在我的 main() 中工作:

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.Enumeration;
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.StyleConstants;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;

/** @see https://stackoverflow.com/questions/2882782 */
public class NewMain {

public static void main(String args[]) throws Exception {
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
htmlKit.read(new BufferedReader(new FileReader("test.html")), htmlDoc, 0);
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null) {
printElement(htmlDoc, element);
}
}
private static void printElement(HTMLDocument htmlDoc, Element element)
throws BadLocationException {
AttributeSet attrSet = element.getAttributes();
System.out.println(""
+ "Element: '" + element.toString().trim()
+ "', name: '" + element.getName()
+ "', children: " + element.getElementCount()
+ ", attributes: " + attrSet.getAttributeCount()
+ ", leaf: " + element.isLeaf());
Enumeration attrNames = attrSet.getAttributeNames();
while (attrNames.hasMoreElements()) {
Object attr = attrNames.nextElement();
System.out.println(" Attribute: '" + attr + "', Value: '"
+ attrSet.getAttribute(attr) + "'");
Object tag = attrSet.getAttribute(StyleConstants.NameAttribute);
if (attr == StyleConstants.NameAttribute
&& tag == HTML.Tag.CONTENT) {
int startOffset = element.getStartOffset();
int endOffset = element.getEndOffset();
int length = endOffset - startOffset;
System.out.printf(" Content (%d-%d): '%s'\n", startOffset,
endOffset, htmlDoc.getText(startOffset, length).trim());
}
}
}
}

关于javax.swing.text.ElementIterator 奇怪的行为,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/5613616/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com