gpt4 book ai didi

java - Java中最好的大集合数据结构

转载 作者:搜寻专家 更新时间:2023-10-31 19:32:15 25 4
gpt4 key购买 nike

我需要在一个大的 Integer Set 中找到间隙,其中填充了一个通过文件的读取循环,我想知道是否存在为此目的已经完成的事情以避免具有堆溢出风险的简单 Set 对象。

为了更好地解释我的问题,我必须告诉您我的票务 Java 软件是如何工作的。每张票都有一个全局累进号码,与其他信息一起存储在每日日志文件中。我必须编写一个检查程序来验证每日日志文件中是否存在数字差距。

第一个想法是创建一个包含所有日志文件的读取循环,读取每一行,获取票号并将其存储在 Integer TreeSet 对象中,然后在该 Set 中查找间隙。问题是票号可能非常高,可能会使内存堆空间饱和,如果我必须切换到 Long 对象,我也想要一个好的解决方案。Set 解决方案浪费了大量内存,因为如果我发现前 100 个数字没有间隙,则将它们存储在 Set 中没有意义。

我该如何解决?我可以为此目的使用一些已经完成的数据结构吗?

最佳答案

我假设 (A) 您正在寻找的间隙是异常(exception)而不是规则,并且 (B) 您正在处理的日志文件主要按票号排序(尽管一些乱序的条目是好的)。

如果是这样,那么我会考虑为此推出您自己的数据结构。这是我的意思的一个简单示例(很多留给读者)。

基本上它所做的是实现 Set 但实际上将其存储为 Map,每个条目代表集合中的一系列连续值。

add 方法被覆盖以适本地维护支持 Map。例如,如果您将 5 添加到集合中并且已经有一个包含 4 的范围,那么它只是扩展该范围而不是添加新条目。

请注意,“大部分排序”假设的原因是,对于完全未排序的数据,此方法仍将使用大量内存:支持映射将变大(因为未排序的条目被添加到各处)越来越小(因为更多的条目填补了空白,允许合并连续的条目)。

代码如下:

package com.matt.tester;

import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeMap;



public class SE {


public class RangeSet<T extends Long> implements SortedSet<T> {

private final TreeMap<T, T> backingMap = new TreeMap<T,T>();

@Override
public int size() {
// TODO Auto-generated method stub
return 0;
}

@Override
public boolean isEmpty() {
// TODO Auto-generated method stub
return false;
}

@Override
public boolean contains(Object o) {
if ( ! ( o instanceof Number ) ) {
throw new IllegalArgumentException();
}
T n = (T) o;
// Find the greatest backingSet entry less than n
Map.Entry<T,T> floorEntry = backingMap.floorEntry(n);
if ( floorEntry == null ) {
return false;
}
final Long endOfRange = floorEntry.getValue();
if ( endOfRange >= n) {
return true;
}
return false;
}

@Override
public Iterator<T> iterator() {
throw new IllegalAccessError("Method not implemented. Left for the reader. (You'd need a custom Iterator class, I think)");
}

@Override
public Object[] toArray() {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public <T> T[] toArray(T[] a) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public boolean add(T e) {
if ( (Long) e < 1L ) {
throw new IllegalArgumentException("This example only supports counting numbers, mainly because it simplifies printGaps() later on");
}
if ( this.contains(e) ) {
// Do nothing. Already in set.
}
final Long previousEntryKey;
final T eMinusOne = (T) (Long) (e-1L);
final T nextEntryKey = (T) (Long) (e+1L);
if ( this.contains(eMinusOne ) ) {
// Find the greatest backingSet entry less than e
Map.Entry<T,T> floorEntry = backingMap.floorEntry(e);
final T startOfPrecedingRange;
startOfPrecedingRange = floorEntry.getKey();
if ( this.contains(nextEntryKey) ) {
// This addition will join two previously separated ranges
T endOfRange = backingMap.get(nextEntryKey);
backingMap.remove(nextEntryKey);
// Extend the prior entry to include the whole range
backingMap.put(startOfPrecedingRange, endOfRange);
return true;
} else {
// This addition will extend the range immediately preceding
backingMap.put(startOfPrecedingRange, e);
return true;
}
} else if ( this.backingMap.containsKey(nextEntryKey) ) {
// This addition will extend the range immediately following
T endOfRange = backingMap.get(nextEntryKey);
backingMap.remove(nextEntryKey);
// Extend the prior entry to include the whole range
backingMap.put(e, endOfRange);
return true;
} else {
// This addition is a new range, it doesn't touch any others
backingMap.put(e,e);
return true;
}
}

@Override
public boolean remove(Object o) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public boolean containsAll(Collection<?> c) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public boolean addAll(Collection<? extends T> c) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public boolean retainAll(Collection<?> c) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public boolean removeAll(Collection<?> c) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public void clear() {
this.backingMap.clear();
}

@Override
public Comparator<? super T> comparator() {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public SortedSet<T> subSet(T fromElement, T toElement) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public SortedSet<T> headSet(T toElement) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public SortedSet<T> tailSet(T fromElement) {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public T first() {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

@Override
public T last() {
throw new IllegalAccessError("Method not implemented. Left for the reader.");
}

public void printGaps() {
Long lastContiguousNumber = 0L;
for ( Map.Entry<T, T> entry : backingMap.entrySet() ) {
Long startOfNextRange = (Long) entry.getKey();
Long endOfNextRange = (Long) entry.getValue();
if ( startOfNextRange > lastContiguousNumber + 1 ) {
System.out.println( String.valueOf(lastContiguousNumber+1) + ".." + String.valueOf(startOfNextRange - 1) );
}
lastContiguousNumber = endOfNextRange;
}
System.out.println( String.valueOf(lastContiguousNumber+1) + "..infinity");
System.out.println("Backing map size is " + this.backingMap.size());
System.out.println(backingMap.toString());
}




}


public static void main(String[] args) {

SE se = new SE();

RangeSet<Long> testRangeSet = se.new RangeSet<Long>();

// Start by putting 1,000,000 entries into the map with a few, pre-determined, hardcoded gaps
for ( long i = 1; i <= 1000000; i++ ) {
// Our pre-defined gaps...
if ( i == 58349 || ( i >= 87333 && i <= 87777 ) || i == 303998 ) {
// Do not put these numbers in the set
} else {
testRangeSet.add(i);
}
}

testRangeSet.printGaps();

}
}

输出是:

58349..58349
87333..87777
303998..303998
1000001..infinity
Backing map size is 4
{1=58348, 58350=87332, 87778=303997, 303999=1000000}

关于java - Java中最好的大集合数据结构,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/38897301/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com