gpt4 book ai didi

java - 按顺序读取行,但按需

转载 作者:行者123 更新时间:2023-11-30 06:28:33 25 4
gpt4 key购买 nike

我需要逐行迭代 Excel 电子表格,但不是一次全部迭代。我想保持对电子表格开放的流,然后将该对象传递给另一个方法。该方法会时不时地请求下一行数据。这个方法已经定下来了,所以我不能改变它。

我最初尝试使用 XSSFWorkbook 执行此操作,效果非常好,直到我耗尽了实际数据的内存。

然后,我尝试使用 XSSFSheetXMLHandler 切换到 SAX 解析器,并使用类似于 Apache POI 项目提供的示例 1 XLSX2CSV 的自定义解析器。但这会导致所有行同时处理;如果我将它们存储在内存中以便稍后阅读,我也会耗尽内存。我也无法再访问用于单元格值处理的 DataFormatter

有人可以给我指出一个允许我这样做的示例/类吗?

最佳答案

我首选的 XML 流 API 是 StAX .

知道 *.xlsx 文件只是一个 ZIP 存档和 apache poi OPCPackage因为这是一个ZipPackage,我们可以考虑以下方法:

  • *.xlsx 中获取 /xl/worksheets/sheetN.xml 包部分Excel ZipPackage
  • 在其上创建一个 StAX 阅读器。
  • 现在我们可以使用此读取器读取此XML

以下示例创建一个基本应用程序,该应用程序通过单击按钮逐行执行此操作。

import java.awt.*;
import java.awt.event.*;
import javax.swing.*;

import org.apache.poi.openxml4j.opc.*;

import javax.xml.stream.*;
import javax.xml.stream.events.*;

import javax.xml.namespace.QName;

import java.util.regex.Pattern;

public class GetExcelRowByRow extends JPanel implements ActionListener {
protected JButton button;
protected JTextArea textArea;
private final static String newline = "\n";

//file path to Excel file and sheet number to work with
private final static String filepath = "file.xlsx";
private final static int scheetnr = 1;

private StaxExcelRowByRowReader reader;

public GetExcelRowByRow() {
super(new GridBagLayout());
button = new JButton("Next Row");
button.addActionListener(this);
textArea = new JTextArea(15, 50) {
@Override
public boolean getScrollableTracksViewportWidth() {
return true;
}
};
textArea.setLineWrap(true);
textArea.setEditable(false);
JScrollPane scrollPane = new JScrollPane(textArea);
GridBagConstraints c = new GridBagConstraints();
c.gridwidth = GridBagConstraints.REMAINDER;
c.fill = GridBagConstraints.HORIZONTAL;
add(button, c);
c.fill = GridBagConstraints.BOTH;
c.weightx = 1.0;
c.weighty = 1.0;
add(scrollPane, c);

try {
reader = new StaxExcelRowByRowReader(filepath, scheetnr);
} catch (Exception ex) {
ex.printStackTrace();
}

}
@Override
public void actionPerformed(ActionEvent evt) {
String row = "Row not found...";
try {
row = reader.getNextRow();
} catch (Exception ex) {
ex.printStackTrace();
}
textArea.append(row + newline);
textArea.setCaretPosition(textArea.getDocument().getLength());
}

public StaxExcelRowByRowReader getReader() {
return reader;
}

private static void createAndShowGUI() {
JFrame frame = new JFrame("Get Excel row by row");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
GetExcelRowByRow app = new GetExcelRowByRow();
frame.add(app);

frame.addWindowListener(new WindowAdapter() {
@Override
public void windowClosing(WindowEvent windowEvent) {
try {
app.getReader().close();
} catch (Exception ex) {
ex.printStackTrace();
}
System.exit(0);
}
});

frame.pack();
frame.setVisible(true);
}

public static void main(String[] args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
createAndShowGUI();
}
});
}

//class for reading a /xl/worksheets/sheetN.xml package part from a *.xlsx Excel ZipPackage
private class StaxExcelRowByRowReader {

private XMLEventReader sheetreader;
private OPCPackage opcpackage;

public StaxExcelRowByRowReader(String filepath, int sheetnr) {
try {
opcpackage = OPCPackage.open(filepath, PackageAccess.READ);
//get the sheet package part
PackagePart sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet"+sheetnr+".xml")).get(0);
//create reader for the sheet package part
sheetreader = XMLInputFactory.newInstance().createXMLEventReader(sheetpart.getInputStream());
} catch (Exception ex) {
ex.printStackTrace();
}
}

//method for getting the next row from the reader
public String getNextRow() throws Exception {
StringBuffer row = new StringBuffer();
boolean valueFound = false;
boolean nextValueIsSharedString = false;
while(sheetreader.hasNext()){
XMLEvent event = sheetreader.nextEvent();
if(event.isStartElement()) {
StartElement startElement = (StartElement)event;
QName startElementName = startElement.getName();
if(startElementName.getLocalPart().equalsIgnoreCase("row")) { //start element of row
row.append("<row");
row.append(" " + startElement.getAttributeByName(new QName("r")));
row.append(">");
} else if(startElementName.getLocalPart().equalsIgnoreCase("c")) { //start element of cell
row.append("<c");
row.append(" " + startElement.getAttributeByName(new QName("r")));
row.append(" " + startElement.getAttributeByName(new QName("s")));
row.append(" " + startElement.getAttributeByName(new QName("t")));
row.append(">");
Attribute type = startElement.getAttributeByName(new QName("t"));
if (type != null && "s".equals(type.getValue())) {
nextValueIsSharedString = true;
} else {
nextValueIsSharedString = false;
}
} else if(startElementName.getLocalPart().equalsIgnoreCase("v")) { //start element of value
row.append("<v>");
valueFound = true;
}
} else if(event.isCharacters() && valueFound) {
Characters characters = (Characters)event;
if (nextValueIsSharedString) {
row.append("shared string: " + characters.getData());
} else {
row.append(characters.getData());
}
} else if(event.isEndElement()) {
EndElement endElement = (EndElement)event;
QName endElementName = endElement.getName();
if(endElementName.getLocalPart().equalsIgnoreCase("v")) { //end element of value
row.append("</v>");
valueFound = false;
} else if(endElementName.getLocalPart().equalsIgnoreCase("c")) { //end element of cell
row.append("</c>");
} else if(endElementName.getLocalPart().equalsIgnoreCase("row")) { //end element of row
row.append("</row>");
return row.toString();
}
}
}
return "No more rows.";
}

public void close() throws Exception {
if (sheetreader != null) sheetreader.close();
if (opcpackage != null) opcpackage.close();
}
}
}

当然,这只是一个展示原理的草案。整个应用程序的代码会多得多。

接下来,我们必须读取并解析 /xl/sharedStrings.xml 包部分,其中包含共享字符串。此外,我们还必须读取并解析包含单元格样式的 /xl/styles.xml 包部分。我们需要样式来检测数值是日期还是数字,如果是数字,则检测是什么类型的数字。这是必要的,因为 Excel 将所有类型的数字存储为 double 值。日期也是 double 字,表示 01/01/1900 之后的天数,小数部分为 1h = 1/24、1m = 1/24/60、1s = 1/24/60/60。

但这可以使用与 /xl/worksheets/sheetN.xml 包部分相同的方法实现。

关于java - 按顺序读取行,但按需,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46601782/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com