gpt4 book ai didi

java - 无法查询链接电影数据库的本地版本

转载 作者:太空宇宙 更新时间:2023-11-04 08:39:42 24 4
gpt4 key购买 nike

我正在尝试使用 SPARQL 查询链接电影数据库的本地版本。该文件为 N-Triples 格式,大小约为 450mb。我正在使用 servlet 来实现。现在,当我传递查询时,servlet 需要大约五分钟多的时间来处理它,最后我得到以下异常:

type Exception report

message

description The server encountered an internal error () that prevented it from fulfilling this request.

exception

javax.servlet.ServletException: Servlet execution threw an exception


root cause

java.lang.OutOfMemoryError: Java heap space
java.util.Arrays.copyOfRange(Arrays.java:3209)
java.lang.String.<init>(String.java:215)
java.lang.StringBuilder.toString(StringBuilder.java:430)
org.openjena.riot.tokens.TokenizerText.allBetween(TokenizerText.java:732)
org.openjena.riot.tokens.TokenizerText.parseToken(TokenizerText.java:152)
org.openjena.riot.tokens.TokenizerText.hasNext(TokenizerText.java:69)
org.openjena.atlas.iterator.PeekIterator.fill(PeekIterator.java:37)
org.openjena.atlas.iterator.PeekIterator.next(PeekIterator.java:77)
org.openjena.riot.lang.LangBase.nextToken(LangBase.java:145)
org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:59)
org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:21)
org.openjena.riot.lang.LangNTuple.runParser(LangNTuple.java:58)
org.openjena.riot.lang.LangBase.parse(LangBase.java:75)
org.openjena.riot.system.JenaReaderNTriples2.readWorker(JenaReaderNTriples2.java:28)
org.openjena.riot.system.JenaReaderRIOT.readImpl(JenaReaderRIOT.java:124)
org.openjena.riot.system.JenaReaderRIOT.read(JenaReaderRIOT.java:79)
com.hp.hpl.jena.rdf.model.impl.ModelCom.read(ModelCom.java:226)
com.hp.hpl.jena.util.FileManager.readModelWorker(FileManager.java:395)
com.hp.hpl.jena.util.FileManager.loadModelWorker(FileManager.java:299)
com.hp.hpl.jena.util.FileManager.loadModel(FileManager.java:250)
ServletExample.runQuery(ServletExample.java:92)
ServletExample.doGet(ServletExample.java:62)
javax.servlet.http.HttpServlet.service(HttpServlet.java:627)
javax.servlet.http.HttpServlet.service(HttpServlet.java:729)


note The full stack trace of the root cause is available in the Apache Tomcat/5.5.31 logs.

我的代码是:

import java.io.IOException;
import java.io.PrintWriter;

import javax.servlet.ServletException;
import javax.servlet.http.*;

import com.hp.hpl.jena.query.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.util.FileManager;

public class ServletExample
extends HttpServlet
{
/***********************************/
/* Constants */
/***********************************/

private static final long serialVersionUID = 1L;

public static final String SPARQL_ENDPOINT = "http://data.linkedmdb.org/sparql";

public static final String QUERY ="PREFIX m: <http://data.linkedmdb.org/resource/movie/>"
+"SELECT DISTINCT ?actorName WHERE {"+
"?dir1 m:director_name \"Sofia Coppola\"."+
"?dir2 m:director_name \"Francis Ford Coppola\"."+
"?dir1film m:director ?dir1;"+
"m:actor ?actor."+
"?dir2film m:director ?dir2;"+
"m:actor ?actor."+
"?actor m:actor_name ?actorName."+
"}";
/*"PREFIX m: <http://data.linkedmdb.org/resource/movie/>\n" +
"SELECT DISTINCT ?actorName WHERE {\n" +
" ?dir1 m:director_name %dir_name_1%.\n" +
" ?dir2 m:director_name %dir_name_2%.\n" +
" ?dir1film m:director ?dir1;\n" +
" m:actor ?actor.\n" +
" ?dir2film m:director ?dir2;\n" +
" m:actor ?actor.\n" +
" ?actor m:actor_name ?actorName.\n" +
"}\n" +
"";*/

private static final String HEADER = "<html>\n" +
" <head>\n" +
" <title>results</title>\n" +
" <link href=\"simple.css\" type=\"text/css\" rel=\"stylesheet\" />\n" +
" </head>\n" +
" <body>\n" +
"";

private static final String FOOTER = "</body></html>";

/**
* Respond to HTTP GET request. Will need to be mounted against some URL
* pattern in web.xml
*/
@Override
protected void doGet( HttpServletRequest req, HttpServletResponse resp )
throws ServletException, IOException
{
String dir1 = req.getParameter( "dir1" );//"Sofia";
String dir2 = req.getParameter( "dir2" );//"Francis Ford Coppola";
//String dir1 = "Sofia";
//String dir2 = "Francis Ford Coppola";
if (dir1 == null || dir2 == null || dir1.isEmpty() || dir2.isEmpty()) {
noInput( resp );
}
else {
runQuery( resp, dir1, dir2 );
}
}

protected void noInput( HttpServletResponse resp )
throws IOException
{
header( resp );
resp.getWriter().println( "<p>Please select director names as query params <code>dir1</code> and <code>dir2</code></p>" );
footer( resp );
}

protected void footer( HttpServletResponse resp ) throws IOException {
resp.getWriter().println( FOOTER );
}

protected void header( HttpServletResponse resp ) throws IOException {
resp.getWriter().println( HEADER );
}

protected void runQuery( HttpServletResponse resp, String dir1, String dir2 )
throws IOException
{
PrintWriter out = resp.getWriter();

// Set up the query
// String q = QUERY.replace( "%dir_name_1%", "\"" + dir1 + "\"" )
// .replace( "%dir_name_2%", "\"" + dir2 + "\"" );
String q=QUERY;
Query query = QueryFactory.create( q ) ;
Model model = FileManager.get().loadModel( "e:\\applications\\linkedmdb-18-05-2009-dump\\dump.nt" );
// QueryExecution qexec = QueryExecutionFactory.sparqlService( SPARQL_ENDPOINT, query );

//com.hp.hpl.jena.query.Query query = QueryFactory.create(QUERY);
QueryExecution qexec = QueryExecutionFactory.create(query, model);
// perform the query
ResultSet results = qexec.execSelect();

// generate the output
header( resp );
if (!results.hasNext()) {
out.println( "<p>No results, sorry.</p>" );
}
else {
out.println( "<h1>Results</h1>" );
while (results.hasNext()) {
QuerySolution qs = results.next();
String actorName = qs.getLiteral( "actorName" ).getLexicalForm();
out.println( String.format( "<div>Actor named: %s</div>", actorName ) );
}
}
footer( resp );
}
}

有什么办法可以解决这个异常吗?

最佳答案

您似乎正在使用 Jena/RIOT 将所有数据加载到内存中。据我所知,LinkedIMDB 足够大,足以给您带来这种方法的问题。您正在做的是将所有数据库调入内存。

增加 JVM 堆可能是一种可能的解决方案,但如果数据持续增长,它就无法扩展。

正确的解决方案是采用专为这种大小的数据集设计的其他 Jena 配置。这些是:

  1. Jena SDB ,它使用关系数据库作为后端。
  2. Jena TDB ,它使用基于 B 树索引的 native Java 存储来加速查询。它的扩展性比 (1) 更好。

您也可以选择可扩展的 RDF 数据库,例如 4store并通过Jena ARQ查询您的数据。该解决方案是迄今为止可扩展且性能更好的解决方案。

关于java - 无法查询链接电影数据库的本地版本,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/5605436/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com