gpt4 book ai didi

java - 在 Java 集合中并行搜索

转载 作者:搜寻专家 更新时间:2023-10-31 20:13:29 25 4
gpt4 key购买 nike

我有一个 List<String>称为 lines和一个巨大的(~3G)Set<String>称为 voc .我需要从 lines 中找到所有行在 voc 中.我可以采用这种多线程方式吗?

目前我有这个简单的代码:

for(String line: lines) {
if (voc.contains(line)) {
// Great!!
}
}

有没有办法同时搜索几行?可能有现成的解决方案吗?

PS:我用的是javolution.util.FastMap ,因为它在填充期间表现更好。

最佳答案

这是一个可能的实现。请注意,错误/中断处理已被省略,但这可能会给您一个起点。我包含了一个主要方法,因此您可以将其复制并粘贴到您的 IDE 中以进行快速演示。

编辑:清理了一些内容以提高可读性和列表分区

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class ParallelizeListSearch {

public static void main(String[] args) throws InterruptedException, ExecutionException {
List<String> searchList = new ArrayList<String>(7);
searchList.add("hello");
searchList.add("world");
searchList.add("java");
searchList.add("debian");
searchList.add("linux");
searchList.add("jsr-166");
searchList.add("stack");

Set<String> targetSet = new HashSet<String>(searchList);

Set<String> matchSet = findMatches(searchList, targetSet);
System.out.println("Found " + matchSet.size() + " matches");
for(String match : matchSet){
System.out.println("match: " + match);
}
}

public static Set<String> findMatches(List<String> searchList, Set<String> targetSet) throws InterruptedException, ExecutionException {
Set<String> locatedMatchSet = new HashSet<String>();

int threadCount = Runtime.getRuntime().availableProcessors();

List<List<String>> partitionList = getChunkList(searchList, threadCount);

if(partitionList.size() == 1){
//if we only have one "chunk" then don't bother with a thread-pool
locatedMatchSet = new ListSearcher(searchList, targetSet).call();
}else{
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
CompletionService<Set<String>> completionService = new ExecutorCompletionService<Set<String>>(executor);

for(List<String> chunkList : partitionList)
completionService.submit(new ListSearcher(chunkList, targetSet));

for(int x = 0; x < partitionList.size(); x++){
Set<String> threadMatchSet = completionService.take().get();
locatedMatchSet.addAll(threadMatchSet);
}

executor.shutdown();
}


return locatedMatchSet;
}

private static class ListSearcher implements Callable<Set<String>> {

private final List<String> searchList;
private final Set<String> targetSet;
private final Set<String> matchSet = new HashSet<String>();

public ListSearcher(List<String> searchList, Set<String> targetSet) {
this.searchList = searchList;
this.targetSet = targetSet;
}

@Override
public Set<String> call() {
for(String searchValue : searchList){
if(targetSet.contains(searchValue))
matchSet.add(searchValue);
}

return matchSet;
}

}

private static <T> List<List<T>> getChunkList(List<T> unpartitionedList, int splitCount) {
int totalProblemSize = unpartitionedList.size();
int chunkSize = (int) Math.ceil((double) totalProblemSize / splitCount);

List<List<T>> chunkList = new ArrayList<List<T>>(splitCount);

int offset = 0;
int limit = 0;
for(int x = 0; x < splitCount; x++){
limit = offset + chunkSize;
if(limit > totalProblemSize)
limit = totalProblemSize;
List<T> subList = unpartitionedList.subList(offset, limit);
chunkList.add(subList);
offset = limit;
}

return chunkList;
}

}

关于java - 在 Java 集合中并行搜索,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14552297/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com