gpt4 book ai didi

java - 如何在java中通过Apache POI Excel输出大型csv文件?

转载 作者:行者123 更新时间:2023-11-30 05:20:16 50 4
gpt4 key购买 nike

努力通过 Apache POI java 将 300k 行写入 csv 文件。我一直在尝试从 300k 行的 Excel 文件生成 csv 文件。每次,当它尝试写入输出 csv 文件时,我都会收到 GCOutMemory 错误。我什至尝试将写入拆分为每 100k 行。输出文件大小持续增长,但我没有看到 system.println 语句没有被打印。

import javafx.beans.binding.StringBinding;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryPoolMXBean;
import java.lang.management.MemoryType;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ReadWrite {
private static Logger logger= LoggerFactory.getLogger(ReadWrite.class);

public static void main(String[] args) {
try {
long startReading = System.currentTimeMillis();
Path path = Paths.get("/Users/venkatesh/Documents/Citiout_files/citiout300k_2sheets.xlsx");

byte[] result = new byte[0];
try {
result = Files.readAllBytes(path);
} catch (IOException e) {
e.printStackTrace();
}
InputStream is = new ByteArrayInputStream(result);

Workbook workbook = WorkbookFactory.create(is);

long readDone = System.currentTimeMillis() - startReading;
logger.info("read time " + readDone);



Sheet sheet = workbook.getSheetAt(1);
Row firstRow = sheet.getRow(0);
int headcol = firstRow.getLastCellNum();
long startTransform = System.currentTimeMillis();
firstRow.createCell(headcol++).setCellValue("Sold Amount1");
firstRow.createCell(headcol++).setCellValue("CF_Quantity1");
firstRow.createCell(headcol++).setCellValue("CF_Quantity2");
firstRow.createCell(headcol++).setCellValue("CF_TradePrice");
firstRow.createCell(headcol++).setCellValue("CF_ForwardPrice");
firstRow.createCell(headcol++).setCellValue("CF_UnrealizedPL");
firstRow.createCell(headcol++).setCellValue("CF_Quantity1Round");
firstRow.createCell(headcol++).setCellValue("CF_Quantity2Round");
firstRow.createCell(headcol++).setCellValue("CF_FXLotKeyNoTradeDate");
firstRow.createCell(headcol++).setCellValue("CF_FXRoundedKeyNoTradeDate");
firstRow.createCell(headcol++).setCellValue("CF_SettlementDate");
for (int i = 1; i <=sheet.getLastRowNum()+1; i++) {
String jj="";
Row nRow = sheet.getRow(i-1);
for(Cell c:nRow) {
if (c.getColumnIndex()==3 && i!=1) {
Calendar cal = Calendar.getInstance();
Date date1 = new SimpleDateFormat("dd-MMM-yyyy").parse(c.getStringCellValue());
cal.setTime(date1);
jj = String.valueOf(cal.get(Calendar.MONTH)+1) + "/" + String.valueOf(cal.get(Calendar.DAY_OF_MONTH)) + "/" + String.valueOf(cal.get(Calendar.YEAR));
}
}
int count = nRow.getLastCellNum();
//System.out.println(nRow.getCell(3).getClass());
nRow.createCell(count++).setCellFormula("G" + i + "*-1");
nRow.createCell(count++).setCellFormula("E" + i + "/" + "G" + i);
nRow.createCell(count++).setCellFormula("G" + i + "/E" + i);
nRow.createCell(count++).setCellFormula("ROUND(ABS(T" + i + "/S" + i + "),6)");
nRow.createCell(count++).setCellFormula("ROUND(K" + i + ",6)");
nRow.createCell(count++).setCellFormula("ROUND(N" + i + ",2)");
nRow.createCell(count++).setCellFormula("ROUND(S" + i + ",0)");
nRow.createCell(count++).setCellFormula("ROUND(T" + i + ",0)");
nRow.createCell(count++).setCellFormula("CONCATENATE(T" + i + "," + "\"~\"" + ",S" + i + ")");
nRow.createCell(count++).setCellFormula("CONCATENATE(X" + i + "," + "\"~\"" + ",Y" + i + ")");
nRow.createCell(count++).setCellValue(jj);
c.setCellValue(DateUtil.getExcelDate(calendar.getTime()));

}
long endTransform = System.currentTimeMillis() - startTransform;
System.out.println("Transformations time " + endTransform);
final FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator();
FileWriter writer= new FileWriter(new enter code hereFile("/Users/venkatesh/Documents/cit300k.csv"));
StringBuilder data = new StringBuilder();
Iterator<Row> rowIterator = workbook.getSheetAt(1).iterator();

try {
while (rowIterator.hasNext()) {

Row row = rowIterator.next();


Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();

CellType type = cell.getCellType();
if (type == CellType.BOOLEAN) {
data.append(cell.getBooleanCellValue());
} else if (type == CellType.NUMERIC) {
data.append(cell.getNumericCellValue());

} else if (type == CellType.STRING) {
data.append(cell.getStringCellValue());
} else if (type == CellType.FORMULA) {
switch (evaluator.evaluateFormulaCell(cell)) {
case STRING:
data.append(cell.getStringCellValue());
break;
case NUMERIC:
data.append(cell.getNumericCellValue());
break;
}
} else if (type == CellType.BLANK) {
} else {
data.append(cell + "");
}
data.append(",");
}
writer.append(data.toString());
writer.append('\n');
}
} catch(Exception e){
e.printStackTrace();
}
finally{
if(writer!=null){
writer.flush();
writer.close();
}
}

for (MemoryPoolMXBean mpBean: ManagementFactory.getMemoryPoolMXBeans()) {
if (mpBean.getType() == MemoryType.HEAP) {
System.out.printf(
"Name: %s: %s\n",
mpBean.getName(), mpBean.getUsage()
);
}
}
try {
workbook.close();
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
catch (Exception e){
e.printStackTrace();
}
}
}


20-01-12 19:52:49:267 INFO main ReadWrite:64 - read time 11354
Transformations time 38659
Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded
at java.util.TreeMap$Values.iterator(TreeMap.java:1031)
at org.apache.poi.xssf.usermodel.XSSFRow.cellIterator(XSSFRow.java:117)
at org.apache.poi.xssf.usermodel.XSSFRow.iterator(XSSFRow.java:132)
at org.apache.poi.xssf.usermodel.XSSFEvaluationSheet.getCell(XSSFEvaluationSheet.java:86)
at org.apache.poi.ss.formula.WorkbookEvaluator.evaluateFormula(WorkbookEvaluator.java:402)
at org.apache.poi.ss.formula.WorkbookEvaluator.evaluateAny(WorkbookEvaluator.java:275)
at org.apache.poi.ss.formula.WorkbookEvaluator.evaluate(WorkbookEvaluator.java:216)
at org.apache.poi.xssf.usermodel.BaseXSSFFormulaEvaluator.evaluateFormulaCellValue(BaseXSSFFormulaEvaluator.java:56)
at org.apache.poi.ss.formula.BaseFormulaEvaluator.evaluateFormulaCell(BaseFormulaEvaluator.java:185)
at ReadWrite.main(ReadWrite.java:150)

最佳答案

现在我们有了可用的堆栈跟踪,很明显,在写入 CSV 文件时不会出现问题。它实际上是在您评估电子表格公式时发生的。我的猜测是,该公式是对工作表中的所有行求和......或类似的东西。

这是一个问题,可能没有简单的解决方案。

这就是POI documentation说:

File sizes/Memory usage

  • There are some inherent limits in the Excel file formats. These are defined in class SpreadsheetVersion. As long as you have enough main-memory, you should be able to handle files up to these limits. For huge files using the default POI classes you will likely need a very large amount of memory.
    • There are ways to overcome the main-memory limitations if needed:
    • For writing very huge files, there is SXSSFWorkbook which allows to do a streaming write of data out to files (with certain limitations on what you can do as only parts of the file are held in memory).
    • For reading very huge files, take a look at the sample XLSX2CSV which shows how you can read a file in streaming fashion (again with some limitations on what information you can read out of the file, but there are ways to get at most of it if necessary).

您显然遇到了这些内存限制。基本上,POI 试图将过多的电子表格加载到内存中...当您正在评估电子表格公式时...并且您正在填充堆。

一种解决方案是增加 Java 堆大小。或者,如果您已经使用了堆的所有可用 RAM,请在具有更多 RAM 的计算机上运行转换。现在很多标准 PC 都配备 16GB RAM。也许是时候进行硬件升级了?但我猜你已经想到了这一点。

如果增加堆大小不可行,那么您将需要重写应用程序以使用SXSSFWorkbook。此外,您可能需要将使用公式计算的方法替换为在 native Java 中进行计算,以与电子表格的逐行流兼容的方式。 (这取决于公式的作用。)

查看 POI 文档中的链接示例以获取想法。

关于java - 如何在java中通过Apache POI Excel输出大型csv文件?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59699368/

50 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com