gpt4 book ai didi

Java:Apache Poi 的 Excel 到 csv 日期转换问题

转载 作者:行者123 更新时间:2023-12-01 21:19:20 28 4
gpt4 key购买 nike

当使用下面的实用程序将大型 Excel 文件转换为 csv 时,由于 Excel 单元格格式定义为 *format,某些日期值转换不正确。

代码位置:https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java

/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */

package org.apache.poi.xssf.eventusermodel;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
* A rudimentary XLSX -> CSV processor modeled on the
* POI sample program XLS2CSVmra from the package
* org.apache.poi.hssf.eventusermodel.examples.
* As with the HSSF version, this tries to spot missing
* rows and cells, and output empty entries for them.
* <p/>
* Data sheets are read using a SAX parser to keep the
* memory footprint relatively small, so this should be
* able to read enormous workbooks. The styles table and
* the shared-string table must be kept in memory. The
* standard POI styles table class is used, but a custom
* (read-only) class is used for the shared string table
* because the standard POI SharedStringsTable grows very
* quickly with the number of unique strings.
* <p/>
* For a more advanced implementation of SAX event parsing
* of XLSX files, see {@link XSSFEventBasedExcelExtractor}
* and {@link XSSFSheetXMLHandler}. Note that for many cases,
* it may be possible to simply use those with a custom
* {@link SheetContentsHandler} and no SAX code needed of
* your own!
*/
public class XLSX2CSV {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;

private void outputMissingRows(int number) {
for (int i=0; i<number; i++) {
for (int j=0; j<minColumns; j++) {
output.append(',');
}
output.append('\n');
}
}

@Override
public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows(rowNum-currentRow-1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}

@Override
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i=currentCol; i<minColumns; i++) {
output.append(',');
}
output.append('\n');
}

@Override
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append(',');
}

// gracefully handle missing CellRef here in a similar way as XSSFCell does
if(cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}

// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i=0; i<missedCols; i++) {
output.append(',');
}
currentCol = thisCol;

// Number or string?
try {
//noinspection ResultOfMethodCallIgnored
Double.parseDouble(formattedValue);
output.append(formattedValue);
} catch (NumberFormatException e) {
output.append('"');
output.append(formattedValue);
output.append('"');
}
}

@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}


///////////////////////////////////////

private final OPCPackage xlsxPackage;

/**
* Number of columns to read starting with leftmost
*/
private final int minColumns;

/**
* Destination for data
*/
private final PrintStream output;

/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
}

/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles The table of styles that may be referenced by cells in the sheet
* @param strings The table of strings that may be referenced by cells in the sheet
* @param sheetInputStream The stream to read the sheet-data from.

* @exception java.io.IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
* @throws SAXException if parsing the XML data fails.
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}

/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException If reading the data from the package fails.
* @throws SAXException if parsing the XML data fails.
*/
public void process() throws IOException, OpenXML4JException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
this.output.println();
this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, new SheetToCSV(), stream);
stream.close();
++index;
}
}

public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Use:");
System.err.println(" XLSX2CSV <xlsx file> [min columns]");
return;
}

File xlsxFile = new File(args[0]);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return;
}

int minColumns = -1;
if (args.length >= 2)
minColumns = Integer.parseInt(args[1]);

// The package open is instantaneous, as it should be.
OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
xlsx2csv.process();
p.close();
}
}

Input format Image - Click here to see the formatting for dates

Input:

date1        date2
1/1/1900 1/1/1900
2/28/2012 2/28/2012
3/15/1965 3/15/1965
1/1/2000 1/1/2000
1/1/2100 1/1/2100
1/1/2115 1/1/2115

Output:

date1        date2
1/1/2000 1/1/1900
2/28/2012 2/28/2012
3/15/1965 3/15/1965
1/1/2000 1/1/2000
1/1/2000 1/1/2100
1/1/2015 1/1/2115

如果您查看输入数据,单元格(即 date1 列)的格式为星号,该星号利用区域设置,具有该格式的单元格会受到影响,因为 1900 会转换为 2000,因此任何高于 2099 的数据也会受到影响...如果单元格(即 Date2 列)的格式不带 *,则值将按预期输出。这是该实用程序的限制还是有解决方法?

如果我以其他方式格式化单元格而不应用区域设置格式,我会得到正确的输出。

最佳答案

无法使用 apache poi 3.15 Final 重现该行为。

您显示的代码会产生以下输出:

Sheet1 [index=0]:
"date1","date2"
"1/1/00","1/1/1900"
"2/28/12","2/28/2012"
"3/15/65","3/15/1965"
"1/1/00","1/1/2000"
"1/1/00","1/1/2100"
"1/1/15","1/1/2115"

因此,对于默认日期格式(format-id = 0xe = 短日期 = *3/14/2012),使用m/d/yy。这是 BuiltinFormats 中定义的。 .

当然,如果您将该输出直接放入 CSV 文件中,则不存在世纪,Excel 在打开此 CSV 文件时必须猜测世纪。

您可以稍微更改一下代码,例如:

...
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, SAXException {
DataFormatter formatter = new DataFormatter() {
@Override
public String formatRawCellContents(double value, int formatIndex, String formatString, boolean use1904Windowing) {
if ("m/d/yy".equals(formatString)) formatString = "m/d/yyyy";
return super.formatRawCellContents(value, formatIndex, formatString, use1904Windowing);
}
};
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
...

这将产生以下输出:

Sheet1 [index=0]:
"date1","date2"
"1/1/1900","1/1/1900"
"2/28/2012","2/28/2012"
"3/15/1965","3/15/1965"
"1/1/2000","1/1/2000"
"1/1/2100","1/1/2100"
"1/1/2115","1/1/2115"

关于Java:Apache Poi 的 Excel 到 csv 日期转换问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42259859/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com