gpt4 book ai didi

java - 无法使用xslt从xml获取所需数据到csv

转载 作者:行者123 更新时间:2023-12-02 12:20:19 27 4
gpt4 key购买 nike

我正在尝试将数据从 XML 加载到 csv,但缺少一些节点数据。下面是我的Java代码,

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.ParseException;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;


public class XML2CSV {

public static int transform(InputStream is, OutputStream os, Transformer transformer, QName name) throws XMLStreamException, TransformerException {
long time1 = System.nanoTime();
// Open input & output files
XMLInputFactory factory = XMLInputFactory.newInstance();
factory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true);
factory.setProperty(XMLInputFactory.IS_VALIDATING, false);
XMLStreamReader reader = factory.createXMLStreamReader(is);
// In case you want to check which implementation is used.
// Woodstox is a bit faster, but not worth adding extra dependency.
Result result = new StreamResult(os);
transformer.transform(new StAXSource(reader), result);

int count = 0;
while (reader.hasNext()) {
if (reader.getEventType() == XMLStreamReader.START_ELEMENT && name.equals(reader.getName())) {
// System.out.println("FOUND " + count);
count++;

if (count % 1000 == 0) {
long time2 = System.nanoTime();
double ms = (time2 - time1) / 1000000.0;
System.out.format("Time=%.2fms Rows=%d%n", ms, count);
}
} else if (reader.getEventType() == XMLStreamReader.START_ELEMENT) {
// System.out.println("Start "+reader.getName()+" != "+name);
}
reader.next();
}
long time2 = System.nanoTime();
double ms = (time2 - time1) / 1000000.0;
System.out.format("Total Time=%.2fms Total rows=%d%n", ms, count);
return count;
}

public static void main(String arg[]) throws Exception {
// Parse command line options
File xsltFile;
File inputFile;
File outputFile;
String tagName;
String namespace;
try {
String xsltFileName = parse("-x", arg, "XSLT sheet", true);
String inputFileName = parse("-f", arg, "Input file", true);
String outputFileName = parse("-o", arg, "Output file", true);
tagName = parse("-t", arg, "Tag name", true);
namespace = parse("-n", arg, "Tag Namespace URL", false);
xsltFile = new File(xsltFileName);
inputFile = new File(inputFileName);
outputFile = new File(outputFileName);
} catch (ParseException e) {
System.err.println(e.getMessage());
System.err.println("Syntax: XML2CSV -f <input file> -o <output file> -x <XSLT stylesheet> -t <Tag name> [-n <namespace URL>]");
System.err.println("Will split given file on given tag with given namespace.");
System.err.println("Will process contents of each tag using given XSLT.");
System.exit(1);
return;
}
if (!xsltFile.exists()) {
System.err.println("File not found " + xsltFile.getAbsolutePath());
System.exit(1);
}
if (!inputFile.exists()) {
System.err.println("File not found " + inputFile.getAbsolutePath());
System.exit(1);
}

// Open XSLT stylesheet
StreamSource stylesource = new StreamSource(xsltFile);
Transformer transformer = TransformerFactory.newInstance().newTransformer(stylesource);

// Create XML tag name which is used to break up XML into rows
final QName name;
if (namespace != null) {
name = new QName(namespace, tagName);
} else {
name = new QName(tagName);
}
System.out.println("Will look for tag " + name + " in namespace " + namespace);

FileOutputStream fos = null;
FileInputStream fis = null;
try {
// Open input & output files
fis = new FileInputStream(inputFile);
fos = new FileOutputStream(outputFile);
transform(fis, fos, transformer, name);
} finally {
if (fos != null) {
fos.close();
}
if (fis != null) {
fis.close();
}
}
}

// Teo - inefficient, but who cares
private static String parse(String option, String[] arg, String desc, boolean required) throws ParseException {
for (int i = 0; i < arg.length; i++) {
if (option.equals(arg[i])) {
if (i + 1 < arg.length) {
String value = arg[i + 1].trim();
return value;
} else {
throw new ParseException(option + " must be followed by an argument", i);
}
}
}
if (required) {
throw new ParseException(desc + " is required", 0);
} else {
return null;
}
}
}

这是我的 XSL 文件,

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:x="http://www.fixprotocol.org/FIXML-5-0-SP2"
xsi:schemaLocation="http://www.fixprotocol.org/FIXML-5-0-SP2 fixml-main-5-0-SP2_.xsd"
xmlns:math="http://www.w3.org/2005/xpath-functions/math"
exclude-result-prefixes="xs math">


<xsl:accumulator name="MktSegID" streamable="yes" as="xs:string?" initial-value="()">
<xsl:accumulator-rule match="x:Batch/x:MktDef" select="string(@MktSegID)"/>
</xsl:accumulator>

<xsl:mode streamable="yes" use-accumulators="MktSegID"/>
<xsl:output method="text" encoding="utf-8" />

<xsl:param name="delim" select="','" />
<xsl:param name="quote" select="'&quot;'" />
<xsl:param name="break" select="'&#xA;'" />


<xsl:template match="/">
<xsl:text>PriSetPx,TxnTm,ID,Src,EventTyp,Dt,Exch,MktSegID </xsl:text>
<xsl:text>&#xA;</xsl:text>
<xsl:apply-templates select="descendant::x:Evnt"/>
</xsl:template>

<xsl:template match="x:Evnt">
<xsl:value-of select="concat($quote, normalize-space(../../@PriSetPx), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, normalize-space(../../@TxnTm), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, normalize-space(../@ID), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, normalize-space(../@Src), $quote)" /><xsl:value-of select="$delim" />

<xsl:value-of select="concat($quote, normalize-space(@EventTyp), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, normalize-space(@Dt), $quote)" /><xsl:value-of select="$delim" />

<xsl:value-of select="concat($quote, normalize-space(../@Exch), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, accumulator-before('MktSegID'), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="$break" />
</xsl:template>
</xsl:stylesheet>

这是我的示例 xml,

<?xml version="1.0" encoding="ISO-8859-1"?>
<FIXML xsi:schemaLocation="http://www.fixprotocol.org/FIXML-5-0-SP2 fixml-main-5-0-SP2_.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.fixprotocol.org/FIXML-5-0-SP2" s="2012-04-23" v="FIX.5.0SP2">
<Batch ID="RPTTA111PUBLI20170509">
************ This is one set of loop************
<MktDef MktID="XEUR" MktSegID="19699" EfctvBizDt="2017-05-11" NxtEfctvBizDt="2017-05-15" MktSeg="FCEA" MarketSegmentDesc="FUT ON EUR AUD" Sym="DE000A160WW0" ParentMktSegmID="FCUR" Ccy="AUD" MktSegStat="10" USFirmFlag="Y" PartID="1">
<Undly Exch="XREU" Sym="CEA" ID="EU0009654748" Src="4" PrevClsPx="1.47"/>
</MktDef>

<SecDef PriSetPx="68708.52">
<Instrmt ID="221096" Src="M" SecTyp="FUT" Status="1" Exch="XLDX" ProdCmplx="1" CFI="FFMCSX" MatDt="2024-12-17" MMY="202412" Mult="1" ValMeth="FUT" SettlMeth="C" PxPrcsn="2" MinPxIncr="0.01" MinPxIncrAmt="0.01">
<Evnt EventTyp="7" Dt="2024-12-17"/>
</Instrmt>
</SecDef>
<SecDef>
.
.
.
</SecDef>
<SecDef>
.
.
.
</SecDef>
<SecDef>
.
.
.
</SecDef>
************ This is one set of loop************
############ This will continue n number of times having millions of line###########
</Batch>
</FIXML>

输出应如下所示,作为包含数据的列,但缺少 @Exch 和 @MktSegId 数据,

PriSetPx TxnTm ID Src EventTyp Dt Exch MktSegID 

请帮助我理解我在 XSL 代码中哪里做错了以及如何获取列数据。

谢谢!

最佳答案

尝试设置一个累加器

<xsl:accumulator name="MktSegID" streamable="yes" as="xs:string?" initial-value="()">
<xsl:accumulator-rule match="x:Batch/x:MktDef" select="string(@MktSegID)"/>
</xsl:accumulator>

添加<xsl:mode streamable="yes" use-accumulators="MktSegID"/>xsl:mode然后代替

<xsl:value-of select="concat($quote, normalize-space(../../../@MktSegID), $quote)" />

使用

<xsl:value-of select="concat($quote, accumulator-before('MktSegID'), $quote)" />

正如评论中已经说过的,它出现而不是 <xsl:value-of select="concat($quote, normalize-space(../../@Exch), $quote)" />你更想要 <xsl:value-of select="concat($quote, normalize-space(../@Exch), $quote)" /> .

关于java - 无法使用xslt从xml获取所需数据到csv,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45833018/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com