gpt4 book ai didi

java - 将大量小文件读入内存的最快方法是什么?

转载 作者:IT老高 更新时间:2023-10-28 20:51:24 26 4
gpt4 key购买 nike

我需要在每个服务器启动时读取大约 50 个文件,并将每个文本文件的表示形式放入内存中。每个文本文件都有自己的字符串(字符串持有者最好使用哪种类型?)。

将文件读入内存的最快方法是什么,以及保存文本以便我可以在内存中操作它(主要是搜索和替换)的最佳数据结构/类型是什么?

谢谢

最佳答案

内存映射文件将是最快的......像这样:

    final File             file;
final FileChannel channel;
final MappedByteBuffer buffer;

file = new File(fileName);
fin = new FileInputStream(file);
channel = fin.getChannel();
buffer = channel.map(MapMode.READ_ONLY, 0, file.length());

然后继续从字节缓冲区中读取。

这将比 FileInputStreamFileReader 快得多。

编辑:

经过一番调查,事实证明,根据您的操作系统,您最好使用新的 BufferedInputStream(new FileInputStream(file)) 代替。然而,一次将整个内容全部读入 char[] 文件的大小听起来是最糟糕的方式。

所以 BufferedInputStream 应该在所有平台上提供大致一致的性能,而内存映射文件可能会慢或快,具体取决于底层操作系统。与所有对性能至关重要的东西一样,您应该测试您的代码,看看什么效果最好。

编辑:

好的,这里有一些测试(第一个测试做了两次以将文件放入磁盘缓存)。

我在 rt.jar 类文件上运行它,解压到硬盘上,这是在 Windows 7 beta x64 下。即 16784 个文件,总共 94,706,637 个字节。

首先是结果...

(记住第一个是重复获取磁盘缓存设置)

  • 数组测试

    • 时间 = 83016
    • 字节 = 118641472
  • 数组测试

    • 时间 = 46570
    • 字节 = 118641472
  • DataInputByteAtATime

    • 时间 = 74735
    • 字节 = 118641472
  • DataInputReadFully

    • 时间 = 8953
    • 字节 = 118641472
  • 内存映射

    • 时间 = 2320
    • 字节 = 118641472

这里是代码...

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.util.HashSet;
import java.util.Set;

public class Main
{
public static void main(final String[] argv)
{
ArrayTest.main(argv);
ArrayTest.main(argv);
DataInputByteAtATime.main(argv);
DataInputReadFully.main(argv);
MemoryMapped.main(argv);
}
}

abstract class Test
{
public final void run(final File root)
{
final Set<File> files;
final long size;
final long start;
final long end;
final long total;

files = new HashSet<File>();
getFiles(root, files);

start = System.currentTimeMillis();

size = readFiles(files);

end = System.currentTimeMillis();
total = end - start;

System.out.println(getClass().getName());
System.out.println("time = " + total);
System.out.println("bytes = " + size);
}

private void getFiles(final File dir,
final Set<File> files)
{
final File[] childeren;

childeren = dir.listFiles();

for(final File child : childeren)
{
if(child.isFile())
{
files.add(child);
}
else
{
getFiles(child, files);
}
}
}

private long readFiles(final Set<File> files)
{
long size;

size = 0;

for(final File file : files)
{
size += readFile(file);
}

return (size);
}

protected abstract long readFile(File file);
}

class ArrayTest
extends Test
{
public static void main(final String[] argv)
{
final Test test;

test = new ArrayTest();
test.run(new File(argv[0]));
}

protected long readFile(final File file)
{
InputStream stream;

stream = null;

try
{
final byte[] data;
int soFar;
int sum;

stream = new BufferedInputStream(new FileInputStream(file));
data = new byte[(int)file.length()];
soFar = 0;

do
{
soFar += stream.read(data, soFar, data.length - soFar);
}
while(soFar != data.length);

sum = 0;

for(final byte b : data)
{
sum += b;
}

return (sum);
}
catch(final IOException ex)
{
ex.printStackTrace();
}
finally
{
if(stream != null)
{
try
{
stream.close();
}
catch(final IOException ex)
{
ex.printStackTrace();
}
}
}

return (0);
}
}

class DataInputByteAtATime
extends Test
{
public static void main(final String[] argv)
{
final Test test;

test = new DataInputByteAtATime();
test.run(new File(argv[0]));
}

protected long readFile(final File file)
{
DataInputStream stream;

stream = null;

try
{
final int fileSize;
int sum;

stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
fileSize = (int)file.length();
sum = 0;

for(int i = 0; i < fileSize; i++)
{
sum += stream.readByte();
}

return (sum);
}
catch(final IOException ex)
{
ex.printStackTrace();
}
finally
{
if(stream != null)
{
try
{
stream.close();
}
catch(final IOException ex)
{
ex.printStackTrace();
}
}
}

return (0);
}
}

class DataInputReadFully
extends Test
{
public static void main(final String[] argv)
{
final Test test;

test = new DataInputReadFully();
test.run(new File(argv[0]));
}

protected long readFile(final File file)
{
DataInputStream stream;

stream = null;

try
{
final byte[] data;
int sum;

stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
data = new byte[(int)file.length()];
stream.readFully(data);

sum = 0;

for(final byte b : data)
{
sum += b;
}

return (sum);
}
catch(final IOException ex)
{
ex.printStackTrace();
}
finally
{
if(stream != null)
{
try
{
stream.close();
}
catch(final IOException ex)
{
ex.printStackTrace();
}
}
}

return (0);
}
}

class DataInputReadInChunks
extends Test
{
public static void main(final String[] argv)
{
final Test test;

test = new DataInputReadInChunks();
test.run(new File(argv[0]));
}

protected long readFile(final File file)
{
DataInputStream stream;

stream = null;

try
{
final byte[] data;
int size;
final int fileSize;
int sum;

stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
fileSize = (int)file.length();
data = new byte[512];
size = 0;
sum = 0;

do
{
size += stream.read(data);

sum = 0;

for(int i = 0; i < size; i++)
{
sum += data[i];
}
}
while(size != fileSize);

return (sum);
}
catch(final IOException ex)
{
ex.printStackTrace();
}
finally
{
if(stream != null)
{
try
{
stream.close();
}
catch(final IOException ex)
{
ex.printStackTrace();
}
}
}

return (0);
}
}
class MemoryMapped
extends Test
{
public static void main(final String[] argv)
{
final Test test;

test = new MemoryMapped();
test.run(new File(argv[0]));
}

protected long readFile(final File file)
{
FileInputStream stream;

stream = null;

try
{
final FileChannel channel;
final MappedByteBuffer buffer;
final int fileSize;
int sum;

stream = new FileInputStream(file);
channel = stream.getChannel();
buffer = channel.map(MapMode.READ_ONLY, 0, file.length());
fileSize = (int)file.length();
sum = 0;

for(int i = 0; i < fileSize; i++)
{
sum += buffer.get();
}

return (sum);
}
catch(final IOException ex)
{
ex.printStackTrace();
}
finally
{
if(stream != null)
{
try
{
stream.close();
}
catch(final IOException ex)
{
ex.printStackTrace();
}
}
}

return (0);
}
}

关于java - 将大量小文件读入内存的最快方法是什么?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/625420/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com