作者热门文章
- iOS/Objective-C 元类和类别
- objective-c - -1001 错误,当 NSURLSession 通过 httpproxy 和/etc/hosts
- java - 使用网络类获取 url 地址
- ios - 推送通知中不播放声音
我正在为外部合并排序编写代码。这个想法是输入文件包含太多数字而无法存储在数组中,因此您读取其中的一些并将其放入要存储的文件中。这是我的代码。虽然它运行得很快,但还不够快。我想知道您是否能想到我可以对代码进行哪些改进。请注意,首先,我将每 1m 个整数排序在一起,因此我跳过了合并算法的迭代。
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
public class ExternalSort {
public static void sort(String f1, String f2) throws Exception {
RandomAccessFile raf1 = new RandomAccessFile(f1, "rw");
RandomAccessFile raf2 = new RandomAccessFile(f2, "rw");
int fileByteSize = (int) (raf1.length() / 4);
int size = Math.min(1000000, fileByteSize);
externalSort(f1, f2, size);
boolean writeToOriginal = true;
DataOutputStream dos;
while (size <= fileByteSize) {
if (writeToOriginal) {
raf1.seek(0);
dos = new DataOutputStream(new BufferedOutputStream(
new MyFileOutputStream(raf1.getFD())));
} else {
raf2.seek(0);
dos = new DataOutputStream(new BufferedOutputStream(
new MyFileOutputStream(raf2.getFD())));
}
for (int i = 0; i < fileByteSize; i += 2 * size) {
if (writeToOriginal) {
dos = merge(f2, dos, i, size);
} else {
dos = merge(f1, dos, i, size);
}
}
dos.flush();
writeToOriginal = !writeToOriginal;
size *= 2;
}
if (writeToOriginal)
{
raf1.seek(0);
raf2.seek(0);
dos = new DataOutputStream(new BufferedOutputStream(
new MyFileOutputStream(raf1.getFD())));
int i = 0;
while (i < raf2.length() / 4){
dos.writeInt(raf2.readInt());
i++;
}
dos.flush();
}
}
public static void externalSort(String f1, String f2, int size) throws Exception{
RandomAccessFile raf1 = new RandomAccessFile(f1, "rw");
RandomAccessFile raf2 = new RandomAccessFile(f2, "rw");
int fileByteSize = (int) (raf1.length() / 4);
int[] array = new int[size];
DataInputStream dis = new DataInputStream(new BufferedInputStream(
new MyFileInputStream(raf1.getFD())));
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(
new MyFileOutputStream(raf2.getFD())));
int count = 0;
while (count < fileByteSize){
for (int k = 0; k < size; ++k){
array[k] = dis.readInt();
}
count += size;
Arrays.sort(array);
for (int k = 0; k < size; ++k){
dos.writeInt(array[k]);
}
}
dos.flush();
raf1.close();
raf2.close();
dis.close();
dos.close();
}
public static DataOutputStream merge(String file,
DataOutputStream dos, int start, int size) throws IOException {
RandomAccessFile raf = new RandomAccessFile(file, "rw");
RandomAccessFile raf2 = new RandomAccessFile(file, "rw");
int fileByteSize = (int) (raf.length() / 4);
raf.seek(4 * start);
raf2.seek(4 *start);
DataInputStream dis = new DataInputStream(new BufferedInputStream(
new MyFileInputStream(raf.getFD())));
DataInputStream dis3 = new DataInputStream(new BufferedInputStream(
new MyFileInputStream(raf2.getFD())));
int i = 0;
int j = 0;
int max = size * 2;
int a = dis.readInt();
int b;
if (start + size < fileByteSize) {
dis3.skip(4 * size);
b = dis3.readInt();
} else {
b = Integer.MAX_VALUE;
j = size;
}
while (i + j < max) {
if (j == size || (a <= b && i != size)) {
dos.writeInt(a);
i++;
if (start + i == fileByteSize) {
i = size;
} else if (i != size) {
a = dis.readInt();
}
} else {
dos.writeInt(b);
j++;
if (start + size + j == fileByteSize) {
j = size;
} else if (j != size) {
b = dis3.readInt();
}
}
}
raf.close();
raf2.close();
return dos;
}
public static void main(String[] args) throws Exception {
String f1 = args[0];
String f2 = args[1];
sort(f1, f2);
}
}
最佳答案
您可能希望一次合并 k>2 个片段。这将 I/O 量从 n log k/log 2 减少到 n log n/log k。
编辑:在伪代码中,这看起来像这样:
void sort(List list) {
if (list fits in memory) {
list.sort();
} else {
sublists = partition list into k about equally big sublists
for (sublist : sublists) {
sort(sublist);
}
merge(sublists);
}
}
void merge(List[] sortedsublists) {
keep a pointer in each sublist, which initially points to its first element
do {
find the pointer pointing at the smallest element
add the element it points to to the result list
advance that pointer
} until all pointers have reached the end of their sublist
return the result list
}
要有效地找到“最小”指针,您可以使用 PriorityQueue
。
关于java - 如何加速 Java 中的外部归并排序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/8402106/
本文实例汇总了Java各种排序算法。分享给大家供大家参考,具体如下: 1. 冒泡排序: ?
1.冒泡排序 它重复地走访过要排序的元素列,依次比较两个相邻的元素,如果顺序错误就把他们交换过来。走访元素的工作是重复地进行直到没有相邻元素需要交换,也就是说该元素列已经排序完成。 算法步
前言 平时用惯了高级语言高级工具高级算法,难免对一些基础算法感到生疏。但最基础的排序算法中实则蕴含着相当丰富的优化思维,熟练运用可起到举一反三之功效。 选择排序 选择排序几乎是
我是一名优秀的程序员,十分优秀!