gpt4 book ai didi

java - 响应式(Reactive)扩展和/或参与者模型如何简化流程编排?

转载 作者:行者123 更新时间:2023-12-01 19:20:36 26 4
gpt4 key购买 nike

我正在实现一个模块,其中我的主进程生成一组并行和顺序的子进程(任务)来完成其工作。任务本身主要是从各种来源获取数据并执行计算。有些是 CPU 密集型的,而另一些则是 IO 密集型的。

当前的实现使用 Java Executor/Completion 服务分多个步骤来实现此目的。此过程工作流程的示例如下所示:

               任务 A1 ---------------->
任务0 -> 任务A2 ---------------->
任务 A3 -> 任务 B1 -> 任务 C(组合所有任务的结果以生成输出)
任务 B2 ->
任务A4 -------------->

任务A1-A4并行运行,任务B1B2也是并行运行。最后,任务C依赖于所有任务AB来编译最终输出。

使用 Executor 服务构建这个似乎不太干净,我一直在寻找更好的方法来做到这一点,因为这些任务依赖项可能会随着时间的推移而改变或增加复杂性,并且具有 FuturesCallable 来管理它们随着时间的推移会变得越来越难看。

我一直在探索这个主题,并遇到了响应式(Reactive)扩展参与者模型框架。 Akka 似乎有点太多了,而 RxJava 在高层次上似乎是一个合理的选择,由于其基于流/事件的处理模式,可以简化设计并使设计更具可扩展性。

RxJava Threading Examples 中的一些示例看起来也很有前途。

我来这里是为了向社区寻求一些建议,看看这是否是正确的方法,以及是否有其他方法/更好的框架来解决此类问题。

================================================== =================================================== ====

使用 JGraphT 编写了以下内容,但仍需要弄清楚如何重用线程池。在本例中,我最终为每个请求创建新的线程执行器。在这里发布代码的主要部分,这应该给出该方法的想法。

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.jgrapht.DirectedGraph;
import org.jgrapht.graph.DefaultEdge;
import org.jgrapht.traverse.TopologicalOrderIterator;

public class GraphTaskExecutor {

ThreadExecutor executor;
private List<Result> results;
private List<TaskInfo> log;
private DirectedGraph<GraphTask, DefaultEdge> graph;
Set<GraphTask> executing;

public GraphTaskExecutor() {
executor = new ThreadExecutor(Runtime.getRuntime()
.availableProcessors() * 4, 60,
new LinkedBlockingQueue<Runnable>());
results = new ArrayList<Result>();
log = new ArrayList<TaskInfo>();
executing = new HashSet<GraphTask>();
}

public List<Result> execute(Request request, List<GraphTask> tasks) {
System.out.println("Preparing task runner. Num Tasks: " + tasks.size());
graph = new GraphTaskBuilder(tasks).buildGraph();
processTasks();
awaitCompletion();
return results;
}

private void awaitCompletion() {
try {
executor.awaitTermination(3, TimeUnit.DAYS);
System.out.println("Results " + results.toString());
} catch (InterruptedException e) {
e.printStackTrace();
}
}

private void processTasks() {
if (graph.vertexSet().size() == 0) {
executor.shutdown();
System.out
.println("All tasks completed... shutting down executor service");
} else {
synchronized (graph) {
Iterator<GraphTask> iter = new TopologicalOrderIterator<GraphTask, DefaultEdge>(
graph);
while (iter.hasNext()) {
GraphTask task = iter.next();
if (graph.incomingEdgesOf(task).size() == 0
&& !executing.contains(task)) {
executor.execute(task);
executing.add(task);
}
}
}
}
}

private void completed(GraphTask t) {
System.out.println("Completed Task: " + t.getName());
synchronized (graph) {
for (DefaultEdge edge : graph.outgoingEdgesOf(t)) {
GraphTask target = graph.getEdgeTarget(edge);
target.addData(t.getData());
}
if (t.isEndPoint())
results.add(t.getResult());
graph.removeVertex(t);
executing.remove(t);
}
processTasks();
}

private class ThreadExecutor extends ThreadPoolExecutor {
public ThreadExecutor(int corePoolSize, long keepAliveSeconds,
BlockingQueue<Runnable> workQueue) {
super(corePoolSize, corePoolSize, keepAliveSeconds,
TimeUnit.SECONDS, workQueue);
}

@Override
protected void beforeExecute(Thread thread, Runnable runTask) {
super.beforeExecute(thread, runTask);
}

@Override
protected void afterExecute(Runnable runTask, Throwable e) {
super.afterExecute(runTask, e);
completed((GraphTask) runTask);
}

}

public static void main(String arg[]) throws Exception {
GraphTaskExecutor graphTaskExecutor = new GraphTaskExecutor();
TaskContext context = new TaskContext();
List<GraphTask> tasks = new ArrayList<GraphTask>();
Request request = new Request(1);

Set<DataType> empty = new HashSet<DataType>();
Set<DataType> producer = new HashSet<DataType>(Arrays.asList(
DataType.ACCT_INFO, DataType.PROJECTIONS));
Set<DataType> consumer = new HashSet<DataType>(Arrays.asList(
DataType.ACCT_INFO, DataType.PROJECTIONS));
Set<DataType> accountResult = new HashSet<DataType>(
Arrays.asList(DataType.ACCT_INFO));
Set<DataType> projectionResult = new HashSet<DataType>(
Arrays.asList(DataType.PROJECTIONS));
Set<DataType> intraDayResult = new HashSet<DataType>(
Arrays.asList(DataType.PROJECTIONS));

tasks.add(new GraphTask(context, "1", "A", producer, empty, empty));
tasks.add(new GraphTask(context, "2", "X", producer, consumer, empty,
"A"));
tasks.add(new GraphTask(context, "3", "Y", producer, consumer,
accountResult, "A"));
tasks.add(new GraphTask(context, "4", "B", producer, consumer, empty,
"A"));
tasks.add(new GraphTask(context, "5", "C", producer, consumer, empty,
"B"));
tasks.add(new GraphTask(context, "6", "D", producer, consumer,
intraDayResult, "C"));
tasks.add(new GraphTask(context, "7", "E", producer, consumer,
projectionResult, "D", "X", "Y"));

graphTaskExecutor.execute(request, tasks);
System.out.println("All DONE");

}

}


import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;

public class GraphTask extends AbstractTask {

private Set<String> dependencies = new TreeSet<String>();

public GraphTask(TaskContext context, String id, String name,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints, String... dependency) {
super(id, name, context, produces, consumes, endpoints);
dependencies.addAll(Arrays.asList(dependency));
}

public GraphTask(TaskContext context, String id, String name,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints) {
super(id, name, context, produces, consumes, endpoints);
}

public void addDependency(String dependency) {
this.dependencies.add(dependency);
}

public Data process(TaskContext context, Data data) throws TaskException {
int time = (int) (Math.random() * 10);
System.out.println("Task " + getName() + " estimated to run for "
+ time + " secs");
TaskResult result = null;
try {
Thread.sleep(time * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
result = new TaskResult(getName());
for (DataType d: getProduces()) {
result.addData(d, d.toString());
}
return result;
}

public Set<String> getDependencies() {
return dependencies;
}

public void setDependencies(Set<String> dependencies) {
this.dependencies = dependencies;
}

}

import java.time.LocalTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public abstract class AbstractTask implements Task<Data, Result> {

private String identifier;
private String name;
private TaskContext context;
private List<Data> prevData;
private Data data;
private Set<DataType> produces;
private Set<DataType> consumes;
private Set<DataType> endpoints;
private TaskStatus status;
private LocalTime startTime;
private LocalTime endTime;
private Result result;

public AbstractTask(String id, String name, TaskContext context,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints) {
this.identifier = id;
this.name = name;
this.context = context;
this.consumes = consumes;
this.produces = produces;
this.endpoints = endpoints;
this.data = new Data();
this.prevData = new ArrayList<Data>();
this.status = TaskStatus.SUCCESS;
}

public AbstractTask(String id, String name, TaskContext context) {
this(id, name, context, new HashSet<DataType>(),
new HashSet<DataType>(), new HashSet<DataType>());
}

public String getIdentifier() {
return identifier;
}

public void setIdentifier(String identifier) {
this.identifier = identifier;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public TaskContext getContext() {
return context;
}

public void setContext(TaskContext context) {
this.context = context;
}

public boolean isEndPoint() {
return (endpoints.size() > 0);
}

private Data preProcess() throws MissingDataException,
SkippedTaskException, ErrorTaskException {
Map<TaskStatus, Data> statusData = new HashMap<TaskStatus, Data>();
Map<DataType, Object> allData = new HashMap<DataType, Object>();

// Get all data from previous results
for (Data r : prevData) {
statusData.put(r.getStatus(), r);
allData.putAll(r.getObjects());
}

Data data = new Data();
data.addData(allData);
data.setStatus(deriveTaskStatus(statusData));

switch (data.getStatus()) {
case SUCCESS:
for (DataType d : consumes) {
if (!allData.containsKey(d)) {
throw new MissingDataException("Task " + name + " Missing input data for "
+ d);
}
}
break;
case SKIPPED:
throw new SkippedTaskException("Previous Task was skipped");
case ERROR:
throw new ErrorTaskException("Previous Task failed");
}
return data;
}

private TaskStatus deriveTaskStatus(Map<TaskStatus, Data> statusData) {
if (statusData.containsKey(TaskStatus.ERROR))
return TaskStatus.ERROR;
if (statusData.containsKey(TaskStatus.SKIPPED))
return TaskStatus.SKIPPED;
return TaskStatus.SUCCESS;
}

private Result postProcess(Data outputData) throws MissingDataException {
Result result = new Result();
for (DataType d : endpoints) {
if (!outputData.getObjects().containsKey(d)) {
throw new MissingDataException("Missing end point data for " + d);
}
result.addData(d, outputData.getObject(d));
}
return result;
}

@Override
public void run() {
System.out.println("Running task: " + name);
try {
Data inputData = preProcess();
data = process(context, inputData);
result = postProcess(data);
} catch (MissingDataException | SkippedTaskException
| ErrorTaskException e) {
data = new Data(TaskStatus.SKIPPED, new Error("SKIP_TASK",
"Skip Task", e));
e.printStackTrace();
} catch (TaskException e) {
data = new Data(TaskStatus.ERROR, new Error("PREV_ERROR",
"Error in dependent task", e));
e.printStackTrace();
}
}

public abstract Data process(TaskContext context, Data data)
throws TaskException;

@Override
public void addData(Data data) {
this.prevData.add(data);
}

public TaskStatus getStatus() {
return status;
}

public void setStatus(TaskStatus status) {
this.status = status;
}

public Result getResult() {
return result;
}

public Set<DataType> getProduces() {
return produces;
}

public void setProduces(Set<DataType> produces) {
this.produces = produces;
}

public Set<DataType> getConsumes() {
return consumes;
}

public void setConsumes(Set<DataType> consumes) {
this.consumes = consumes;
}

public Data getData() {
return data;
}

@Override
public String toString() {
return "[" + name + "]";
}

@Override
public TaskInfo getTaskInfo() {
return new TaskInfo(this.identifier, this.name, this.status,
this.startTime, this.endTime);
}

public Set<DataType> getEndpoints() {
return endpoints;
}

public void setEndpoints(Set<DataType> endpoints) {
this.endpoints = endpoints;
}

}

最佳答案

Rx-Java 在这里将是一个不错的选择,因为它旨在构建复杂的消息流,在应用程序内 fork 和加入并行执行(只是另一个编译依赖项)。虽然 CPU 密集型任务可能会以简单的回调结束,但 IO 密集型任务需要异步 IO 支持(您也可以使用 rx-java 自己完成)。

Akka 和 Vert.x 是构建应用程序的完整框架,而 Rx-Java 只是一个将异步函数式编程的优势引入应用程序的库。

关于java - 响应式(Reactive)扩展和/或参与者模型如何简化流程编排?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27746842/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com