gpt4 book ai didi

java - TSV 文件转换为 RDF

转载 作者:行者123 更新时间:2023-12-02 03:44:30 29 4
gpt4 key购买 nike

我想使用 Java 代码中的 Apache Jena 的库将 tsv 文件转换为 rdf 文件。我找到了一个从 csv 转换为 rdf 的示例,但它对我没有多大帮助。链接为:http://www.essi.upc.edu/dtim/blog/post/enter-the-world-of-semantics-using-jena-to-convert-your-data-to-rdf

你能给我一些想法吗?多谢!我应该对给定的代码进行哪些更改?

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Scanner;

import org.apache.commons.io.FileUtils;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.propertytable.graph.GraphCSV;
import org.apache.jena.propertytable.lang.CSV2RDF;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.util.FileManager;
import org.apache.jena.vocabulary.RDF;

public static void convertCSVToRDF (String file,
String inputFilename, String outputFilename,String outputType) {

//Just a few lines below to convert the data from CSV to an RDF graph,
see how easy?!

CSV2RDF.init();//Initialise the CSV conversion engine in Jena

GraphCSV newGraph = new GraphCSV(inputFilename);

Model model = ModelFactory.createModelForGraph(newGraph);



//Manually insert class triples for each instance in the CSV file

String sparqlQueryString = "select distinct ?s where {?s ?p ?o}";

Query query = QueryFactory.create(sparqlQueryString);

QueryExecution qexec = QueryExecutionFactory.create(sparqlQueryString, model);

ResultSet s = qexec.execSelect();

Model m2 = ModelFactory.createDefaultModel();

while(s.hasNext()) {

QuerySolution so = s.nextSolution();

Triple t = new Triple(so.getResource("s").asNode(),RDF.type.asNode(),

NodeFactory.createBlankNode(file));

Statement stmt = ResourceFactory.createStatement(so.getResource("s"), RDF.type,

ResourceFactory.createResource(file));

m2.add(stmt);

}

Model m3 = ModelFactory.createUnion(model, m2); //create a new RDF graph which "unions"

//the old graph with the new graph containing

//the new rows



//Now serialize the RDF graph to an output file using the outputType input variable

you specify. It should be “N-Triple” in our case.

try {

FileWriter out = new FileWriter(outputFilename);

m3.write(out,outputType);

} catch (Exception e) {

System.out.println("Error in the file output process!");

e.printStackTrace();

}



//Delete specific triples of a specific predicate called ¨row¨

File output = new File(outputFilename);

File tempFile = new File("C:/Users/user1/SampleFile/temp.nt");

BufferedReader reader = null;

BufferedWriter writer = null;

try {

reader = new BufferedReader(new FileReader(output));

writer = new BufferedWriter(new FileWriter(tempFile));

String currentLine;

//Delete triples from the old file by skipping it while reading the input N-Triple

file from the last step, otherwise write the triple to a new temp file!

while ((currentLine = reader.readLine()) != null) {

if (currentLine.contains("http://w3c/future-csv-vocab/row")) {

continue;

} else {

writer.write(currentLine);

writer.newLine();

}

}

writer.close();

reader.close();



PrintWriter printer = new PrintWriter(output);

printer.print("");

printer.close();



//copy content from temp file to final output file, overwriting it.

FileUtils.copyFile(tempFile, output);

} catch (FileNotFoundException e1) {

// TODO Auto-generated catch block

e1.printStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

最佳答案

在耶拿没有内置方法将 TSV 转换为 RDF,因为 TSV(就像 CSV)不是 RDF 的格式,而是适用于以表格形式表示的任何数据。也许出于这个原因,Jena 中删除了 CSV 支持(jena-csv 的最新版本是 3.9.0)。

但是 TSV - tab s分离的v值 - 是非常简单的格式。将 TSV 数据转换为 RDF 是一项非常简单的任务(大约 10 分钟编码)。您可以使用 TSV 做任何您想做的事情,例如,您可以将所有 TSV 添加为一个巨大的文字。

但以下方式演示了一种合理的方法,其中每一行都是具有数据属性断言的 OWL 个体。

    String tsv = "Sepal length\tSepal width\tPetal length\tPetal width\tSpecies\n" +
"5.1\t3.5\t1.4\t0.2\tI. setosa\n" +
"4.9\t3.0\t1.4\t0.2\tI. setosa";

Charset ch = StandardCharsets.UTF_8;
String separator = "\t";
String ns = "http://ex#";
UnaryOperator<String> nameToURI = s -> ns + s.toLowerCase().replace(" ", "_");

Model m = ModelFactory.createDefaultModel()
.setNsPrefixes(PrefixMapping.Standard)
.setNsPrefix("ex", ns);
Resource clazz = m.createResource(ns + "MyClass", OWL.Class);

try (InputStream is = new ByteArrayInputStream(tsv.getBytes(ch));
Reader r = new InputStreamReader(is, ch);
BufferedReader br = new BufferedReader(r)) {
String first = br.lines().findFirst().orElseThrow(IllegalArgumentException::new);
List<Property> props = Arrays.stream(first.split(separator))
.map(s -> m.createResource(nameToURI.apply(s), OWL.DatatypeProperty)
.addProperty(RDFS.label, s).as(Property.class))
.collect(Collectors.toList());
br.lines().forEach(line -> {
String[] data = line.split(separator);
if (data.length != props.size()) throw new IllegalArgumentException();
Resource individual = m.createResource(clazz);
for (int i = 0; i < data.length; i++) {
individual.addProperty(props.get(i), data[i]);
}
});
}
m.write(System.out, "ttl");

输出:

@prefix ex:    <http://ex#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .

ex:MyClass a owl:Class .

ex:sepal_width a owl:DatatypeProperty ;
rdfs:label "Sepal width" .

ex:species a owl:DatatypeProperty ;
rdfs:label "Species" .

ex:sepal_length a owl:DatatypeProperty ;
rdfs:label "Sepal length" .

ex:petal_length a owl:DatatypeProperty ;
rdfs:label "Petal length" .

ex:petal_width a owl:DatatypeProperty ;
rdfs:label "Petal width" .

[ a ex:MyClass ;
ex:petal_length "1.4" ;
ex:petal_width "0.2" ;
ex:sepal_length "5.1" ;
ex:sepal_width "3.5" ;
ex:species "I. setosa"
] .

[ a ex:MyClass ;
ex:petal_length "1.4" ;
ex:petal_width "0.2" ;
ex:sepal_length "4.9" ;
ex:sepal_width "3.0" ;
ex:species "I. setosa"
] .

关于java - TSV 文件转换为 RDF,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56816846/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com