gpt4 book ai didi

java - 如何避免跳过 Apache POI 中的空白行或列

转载 作者:行者123 更新时间:2023-11-29 04:23:19 25 4
gpt4 key购买 nike

当我使用 Apace POI 解析文件时,空行被跳过并返回一个包含非空行的字符串数组列表

我如何告诉 API,不要跳过读取空行或空列?

要读取的代码以某种方式跳过了没有数据的行。

XExcelFileReader fileReader = new XExcelFileReader(excelByteArray, sheetFromWhichToRead);

dataFromFile = fileReader.readRows();

用于从类 XExcelFileReader 中读取数据。变量 dataFromFile 是一个字符串数组列表。

这是读取数据行的代码:

    import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;

/**
* This class is responsible for reading the row contents for a given byte array and sheet name
*/
public class XExcelFileReader {
/**
* This is the Row Num of the current row that was read
*/
private int rowNum = 0;
/**
* The OPCPackage is the package used to laod the Input Stream used for only .xlsx Files
*/
private OPCPackage opcPkg;
/**
* These are the String Tables that are read from the Excel File
*/
private ReadOnlySharedStringsTable stringsTable;
/**
* The XML Streaming API will be used
*/
private XMLStreamReader xmlReader;

/** The styles table which has formatting information about cells. */
private StylesTable styles;

/**
* @param excelByteArray the excel byte array
* @param sheetFromWhichToRead the excel sheet from which to read
* @throws Exception the exception
*/
public XExcelFileReader(final byte[] excelByteArray, final String sheetFromWhichToRead) throws Exception {

InputStream excelStream = new ByteArrayInputStream(excelByteArray);
opcPkg = OPCPackage.open(excelStream);
this.stringsTable = new ReadOnlySharedStringsTable(opcPkg);

XSSFReader xssfReader = new XSSFReader(opcPkg);
styles = xssfReader.getStylesTable();
XMLInputFactory factory = XMLInputFactory.newInstance();

XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
InputStream inputStream = null;
while (iter.hasNext()) {
inputStream = iter.next();
String tempSheetName = iter.getSheetName();
if (StringUtils.isNotEmpty(tempSheetName)) {
tempSheetName = tempSheetName.trim();
if (tempSheetName.equals(sheetFromWhichToRead)) {
break;
}
}
}
xmlReader = factory.createXMLStreamReader(inputStream);

while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("sheetData")) {
break;
}
}
}
}

/**
* @return rowNum
*/
public final int rowNum() {
return rowNum;
}

/**
* @return List<String[]> List of String array which can hold the content
* @throws XMLStreamException the XMLStreamException
*/
public final List<String[]> readRows() throws XMLStreamException {
String elementName = "row";
List<String[]> dataRows = new ArrayList<String[]>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals(elementName)) {
rowNum++;
dataRows.add(getDataRow());
// TODO need to see if the batch Size is required
// if (dataRows.size() == batchSize)
// break;
}
}
}

return dataRows;
}

/**
* @return String [] of Row Data
* @throws XMLStreamException the XMLStreamException
*/
private String[] getDataRow() throws XMLStreamException {
List<String> rowValues = new ArrayList<String>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("c")) {
CellReference cellReference = new CellReference(xmlReader.getAttributeValue(null, "r"));
// Fill in the possible blank cells!
while (rowValues.size() < cellReference.getCol()) {
rowValues.add("");
}
String cellType = xmlReader.getAttributeValue(null, "t");
String cellStyleStr = xmlReader.getAttributeValue(null, "s");
rowValues.add(getCellValue(cellType, cellStyleStr));
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("row")) {
break;
}
}
return rowValues.toArray(new String[rowValues.size()]);
}

/**
* @param cellType the cell type
* @param cellStyleStr the cell style
* @return cell content the cell value
* @throws XMLStreamException the XMLStreamException
*/
private String getCellValue(final String cellType, final String cellStyleStr) throws XMLStreamException {
String value = ""; // by default
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("v")) {

if (cellType != null && cellType.equals("s")) {
int idx = Integer.parseInt(xmlReader.getElementText());
String s = stringsTable.getEntryAt(idx);
return new XSSFRichTextString(s).toString();
}

if (cellStyleStr != null) {
String cellValue = xmlReader.getElementText();
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = styles.getStyleAt(styleIndex);
short formatIndex = style.getDataFormat();
if (!isValidDouble(cellValue)) {
return cellValue;
}
double doubleVal = Double.valueOf(cellValue);
boolean isValidExcelDate = HSSFDateUtil.isInternalDateFormat(formatIndex);
Date date = null;
if (isValidExcelDate) {
date = HSSFDateUtil.getJavaDate(doubleVal);
String dateStr = dateAsString(date);
return dateStr;
}
if (!(doubleVal == Math.floor(doubleVal))) {
return Double.toString(doubleVal);
}
return cellValue;
}

else {
return xmlReader.getElementText();
}
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("c")) {
break;
}
}
return value;
}

/**
* To check whether the incoming value can be used in the Double utility method Double.valueOf() to prevent
* NumberFormatException.
* @param stringVal - String to be validated.
* @return - true if it is a valid String which can be passed into Double.valueOf method. <br/> For more information
* refer- <a>https://docs.oracle.com/javase/7/docs/api/java/lang/Double.html#valueOf(java.lang.String)</a>
*/
private boolean isValidDouble(String stringVal) {
final String Digits = "(\\p{Digit}+)";
final String HexDigits = "(\\p{XDigit}+)";
// an exponent is 'e' or 'E' followed by an optionally
// signed decimal integer.
final String Exp = "[eE][+-]?" + Digits;
final String fpRegex = ("[\\x00-\\x20]*" + // Optional leading "whitespace"
"[+-]?(" + // Optional sign character
"NaN|" + // "NaN" string
"Infinity|" + // "Infinity" string

// A decimal floating-point string representing a finite positive
// number without a leading sign has at most five basic pieces:
// Digits . Digits ExponentPart FloatTypeSuffix
//
// Since this method allows integer-only strings as input
// in addition to strings of floating-point literals, the
// two sub-patterns below are simplifications of the grammar
// productions from the Java Language Specification, 2nd
// edition, section 3.10.2.

// Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt
"(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" +

// . Digits ExponentPart_opt FloatTypeSuffix_opt
"(\\.(" + Digits + ")(" + Exp + ")?)|" +

// Hexadecimal strings
"((" +
// 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "(\\.)?)|" +

// 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" +

")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*");// Optional trailing "whitespace"

if (Pattern.matches(fpRegex, stringVal))
return true;
else {
return false;
}

}

/**
* Date as string.
* @param date the date
* @return the string
*/
public static String dateAsString(final Date date) {
String dateAsString = null;
if (date != null) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
dateAsString = sdf.format(date);
}
return dateAsString;
}

@Override
protected final void finalize() throws Throwable {
if (opcPkg != null) {
opcPkg.close();
}
super.finalize();
}

}

我希望空行也成为字符串数组列表的一部分。只是字符串数组将为空。

如果在 Excel 中是这样的:

第一行

<<没有数据>>

第三排

那么列表应该是

dataFromFile
0 ->[First Row]
1 ->[]
2 ->[Third Row]

最佳答案

这是 Busy Developers Guide 的内容在 POI 网站上说,阅读表中有缺失的行:

for (int rowNum = rowStart; rowNum < rowEnd; rowNum++) {
Row r = sheet.getRow(rowNum);
if (r == null) {
// This whole row is empty
// Handle it as needed
continue;
}

int lastColumn = Math.max(r.getLastCellNum(), MY_MINIMUM_COLUMN_COUNT);

for (int cn = 0; cn < lastColumn; cn++) {
Cell c = r.getCell(cn, Row.RETURN_BLANK_AS_NULL);
if (c == null) {
// The spreadsheet is empty in this cell
} else {
// Do something useful with the cell's contents
}
}
}

你真的可以在那里得到很多答案。您永远不必直接解析 XML。即使是高级用户模型中缺少的东西,通常也可以通过 XMLBeans 生成的 CT 类来获取它。

关于java - 如何避免跳过 Apache POI 中的空白行或列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47790569/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com