gpt4 book ai didi

Java.lang.OutOfMemoryError : JAVA HEAP SPACE while converting xlsx to csv format. 我尝试了这个没有运气 -Xms400m -Xmx4096m

转载 作者:行者123 更新时间:2023-12-01 18:41:11 26 4
gpt4 key购买 nike

import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.net.*;
import java.util.List;

import com.google.common.base.Joiner;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;

import static org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted;

public class XlsxToCsv {

private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
private XSSFSheet excelSheet;
private String outputFile;
private String defaultFS;
private List<String[]> arrayCSV = new ArrayList<>();

// Minumun amount of rows to expect
private int minRows = 5;

// Escape char for hive
private String escapeStr = "\\";

// Escape char for csv
private String separatorStr = ",";

public XlsxToCsv(String inputFile, String outputFile, String excelSheet, String defaultFS) throws IOException {
this.outputFile = outputFile;
this.defaultFS = defaultFS;
this.excelSheet = new XSSFWorkbook(new FileInputStream(inputFile)).getSheet(excelSheet);
}

public void setMinRows(int minRows) {
this.minRows = minRows;
}

public void setEscapeStr(String escapeStr) {
this.escapeStr = escapeStr;
}

public void setSeparatorStr(String separatorStr) {
this.separatorStr = separatorStr;
}

public List<String[]> getArrayCSV() {
return arrayCSV;
}

private void convertToCsv() {
Cell cell;

try {

for (Row row : excelSheet) {
int lastColumnNum = Math.max(row.getLastCellNum(), minRows);
String[] rowArray = new String[lastColumnNum];

for (int count = 0; count < lastColumnNum; count++) {
cell = row.getCell(count, Row.CREATE_NULL_AS_BLANK);

switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA:
rowArray[count] = isCellDateFormatted(cell) ? dateFormat.format(cell.getDateCellValue()) : Double.toString(cell.getNumericCellValue());
break;
case Cell.CELL_TYPE_BOOLEAN:
rowArray[count] = Boolean.toString(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_NUMERIC:
rowArray[count] = isCellDateFormatted(cell) ? dateFormat.format(cell.getDateCellValue()) : Double.toString(cell.getNumericCellValue());
break;
case Cell.CELL_TYPE_STRING:
rowArray[count] = cell.getStringCellValue().replace(separatorStr, escapeStr + separatorStr).replace("\n", " ");
break;
default:
rowArray[count] = "";
}
}

arrayCSV.add(rowArray);
}
} catch (Exception e) {
e.printStackTrace();
}
}

public void writeCsv() {
try {
FileSystem fs = FileSystem.get(new URI(defaultFS), new Configuration());
FSDataOutputStream outputStream = fs.create(new Path(outputFile));

Joiner joinEmpty = Joiner.on("").skipNulls();
Joiner joinComma = Joiner.on(separatorStr).skipNulls();

for (String[] i : arrayCSV) {
if (joinEmpty.join(i).length() > 0) {
outputStream.writeBytes(joinComma.join(i) + "\n");
}
}

outputStream.hflush();
outputStream.hsync();
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}


public static void main(String[] args) throws IOException {
String inputFile = args[0];
String outputFile = args[1];
String excelSheet = args[2];
String defaultFS = args[3];

XlsxToCsv processExcelFile = new XlsxToCsv(inputFile, outputFile, excelSheet, defaultFS);
processExcelFile.convertToCsv();
processExcelFile.writeCsv();

Joiner joinEmpty = Joiner.on("").skipNulls();
Joiner joinComma = Joiner.on(",").skipNulls();

for (String[] record : processExcelFile.getArrayCSV()) {
if (joinEmpty.join(record).length() > 0) {
System.out.print(joinComma.join(record));
System.out.print("\n");
}
}
}
}

将 xlsx 转换为 csv 时,我遇到以下错误消息:

Caused by: java.lang.OutOfMemoryError: Java heap space  
at java.io.ByteArrayOutputStream.<init>(ByteArrayOutputStream.java:77)
at org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource$FakeZipEntry.<init>(ZipInputStreamZipEntrySource.java:123)
at org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource.<init>(ZipInputStreamZipEntrySource.java:57)
at org.apache.poi.openxml4j.opc.ZipPackage.<init>(ZipPackage.java:93)
at org.apache.poi.openxml4j.opc.OPCPackage.open(OPCPackage.java:278)
at org.apache.poi.util.PackageHelper.open(PackageHelper.java:37)
at org.apache.poi.xssf.usermodel.XSSFWorkbook.<init>(XSSFWorkbook.java:274)
at acvx.agg.qwuer.excel.XlsxToCsv.<init>(XlsxToCsv.java:43)
at acvx.agg.qwuer.excel.XlsxToCsv.main(XlsxToCsv.java:135)

最佳答案

一些建议:1)我怀疑您正在逐行读取Excel,但在某些行之后可能没有数据,并且您的程序中似乎没有对此进行检查!2)我建议尝试编辑它以仅运行 50 行,您会得到一个好主意,如果您的 excel 是 250 行,它可以转换该部分。3)如果您在上面获得成功,请尝试进入读取和写入循环,当前您正在将所有内容读取到数组,然后执行下一个写入所有内容的函数。只是建议打破这个,比如读 10 并写 10。确保在此之后将变量设置为空,以便它们可用于垃圾收集4) eclipse 大小的增加取决于您机器的总 RAM,您不能超出给定机器的某个点,所以我同意这可能不一定能解决您的问题。

关于Java.lang.OutOfMemoryError : JAVA HEAP SPACE while converting xlsx to csv format. 我尝试了这个没有运气 <java-opts>-Xms400m -Xmx4096m</java-opts>,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59930426/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com