- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我想使用流来并行执行在单独的设备数据阵列上工作的内核。数据在设备上分配并从以前的内核填充。
我编写了以下程序,表明我目前无法达到我的目标。事实上,两个非默认流上的内核在各自的流中顺序执行。
在 2 台装有最新 Debian Linux 版本的 Intel 机器上观察到同样的行为。一个是带有 CUDA 4.2 的 Tesla C2075,另一个是带有 CUDA 5.0 的 Geforce 460GT。 Visual Profiler 显示了 4.2 和 5.0 CUDA 版本中的顺序执行。
代码如下:
#include <iostream>
#include <stdio.h>
#include <ctime>
#include <curand.h>
using namespace std;
// compile and run this way:
// nvcc cuStreamsBasics.cu -arch=sm_20 -o testCuStream -lcuda -lcufft -lcurand
// testCuStream 1024 512 512
/* -------------------------------------------------------------------------- */
// "useful" macros
/* -------------------------------------------------------------------------- */
#define MSG_ASSERT( CONDITION, MSG ) \
if (! (CONDITION)) \
{ \
std::cerr << std::endl << "Dynamic assertion `" #CONDITION "` failed in " << __FILE__ \
<< " line " << __LINE__ << ": <" << MSG << ">" << std::endl; \
exit( 1 ); \
} \
#define ASSERT( CONDITION ) \
MSG_ASSERT( CONDITION, " " )
// allocate data on the GPU memory, unpinned
#define CUDALLOC_GPU( _TAB, _DIM, _DATATYPE ) \
MSG_ASSERT( \
cudaMalloc( (void**) &_TAB, _DIM * sizeof( _DATATYPE) ) \
== cudaSuccess , "failed CUDALLOC" );
/* -------------------------------------------------------------------------- */
// the CUDA kernels
/* -------------------------------------------------------------------------- */
// finds index in 1D array from sequential blocks
#define CUDAINDEX_1D \
blockIdx.y * ( gridDim.x * blockDim.x ) + \
blockIdx.x * blockDim.x + \
threadIdx.x; \
__global__ void
kernel_diva(float* data, float value, int array_size)
{
int i = CUDAINDEX_1D
if (i < array_size)
data[i] /= value;
}
__global__ void
kernel_jokea(float* data, float value, int array_size)
{
int i = CUDAINDEX_1D
if (i < array_size)
data[i] *= value + sin( double(i)) * 1/ cos( double(i) );
}
/* -------------------------------------------------------------------------- */
// usage
/* -------------------------------------------------------------------------- */
static void
usage(int argc, char **argv)
{
if ((argc -1) != 3)
{
printf("Usage: %s <dimx> <dimy> <dimz> \n", argv[0]);
printf("do stuff\n");
exit(1);
}
}
/* -------------------------------------------------------------------------- */
// main program, finally!
/* -------------------------------------------------------------------------- */
int
main(int argc, char** argv)
{
usage(argc, argv);
size_t x_dim = atoi( argv[1] );
size_t y_dim = atoi( argv[2] );
size_t z_dim = atoi( argv[3] );
cudaStream_t stream1, stream2;
ASSERT( cudaStreamCreate( &stream1 ) == cudaSuccess );
ASSERT( cudaStreamCreate( &stream2 ) == cudaSuccess );
size_t size = x_dim * y_dim * z_dim;
float *data1, *data2;
CUDALLOC_GPU( data1, size, float);
CUDALLOC_GPU( data2, size, float);
curandGenerator_t gen;
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
/* Set seed */
curandSetPseudoRandomGeneratorSeed(gen, 1234ULL);
/* Generate n floats on device */
curandGenerateUniform(gen, data1, size);
curandGenerateUniform(gen, data2, size);
dim3 dimBlock( z_dim, 1, 1);
dim3 dimGrid( x_dim, y_dim, 1);
clock_t start;
double diff;
cudaDeviceSynchronize();
start = clock();
kernel_diva <<< dimGrid, dimBlock>>>( data1, 5.55f, size);
kernel_jokea<<< dimGrid, dimBlock>>>( data1, 5.55f, size);
kernel_diva <<< dimGrid, dimBlock>>>( data2, 5.55f, size);
kernel_jokea<<< dimGrid, dimBlock>>>( data2, 5.55f, size);
cudaDeviceSynchronize();
diff = ( std::clock() - start ) / (double)CLOCKS_PER_SEC;
cout << endl << "sequential: " << diff;
cudaDeviceSynchronize();
start = clock();
kernel_diva <<< dimGrid, dimBlock, 0, stream1 >>>( data1, 5.55f, size);
kernel_diva <<< dimGrid, dimBlock, 0, stream2 >>>( data2, 5.55f, size);
kernel_jokea<<< dimGrid, dimBlock, 0, stream1 >>>( data1, 5.55f, size);
kernel_jokea<<< dimGrid, dimBlock, 0, stream2 >>>( data2, 5.55f, size);
cudaDeviceSynchronize();
diff = ( std::clock() - start ) / (double)CLOCKS_PER_SEC;
cout << endl << "parallel: " << diff;
cudaStreamDestroy( stream1 );
cudaStreamDestroy( stream2 );
return 0;
}
通常,数组的维度是 512^3
单例float
.我通常只是将数组分成 (512,1,1)
block 。我放在大小为 (1<<15, (rest), 1)
的网格上的线程.
提前感谢您的任何提示或评论。
最好的问候。
最佳答案
我试图解释为什么您看不到两个内核的执行重叠。为此,我构建了下面报告的代码,它使用您的两个内核并监控每个 block 在哪个流式多处理器 (SM) 上运行。我使用的是 CUDA 6.5(候选版本),我在 GT540M 卡上运行,它只有 2
SM,因此它提供了一个简单的工作场所。 blockSize
选择委托(delegate)给新的 CUDA 6.5 cudaOccupancyMaxPotentialBlockSize
工具。
代码
#include <stdio.h>
#include <time.h>
//#define DEBUG_MODE
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/**************************************************/
/* STREAMING MULTIPROCESSOR IDENTIFICATION NUMBER */
/**************************************************/
__device__ unsigned int get_smid(void) {
unsigned int ret;
asm("mov.u32 %0, %smid;" : "=r"(ret) );
return ret;
}
/************/
/* KERNEL 1 */
/************/
__global__ void kernel_1(float * __restrict__ data, const float value, int *sm, int N)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < N) {
data[i] = data[i] / value;
if (threadIdx.x==0) sm[blockIdx.x]=get_smid();
}
}
//__global__ void kernel_1(float* data, float value, int N)
//{
// int start = blockIdx.x * blockDim.x + threadIdx.x;
// for (int i = start; i < N; i += blockDim.x * gridDim.x)
// {
// data[i] = data[i] / value;
// }
//}
/************/
/* KERNEL 2 */
/************/
__global__ void kernel_2(float * __restrict__ data, const float value, int *sm, int N)
{
int i = threadIdx.x + blockIdx.x*blockDim.x;
if (i < N) {
data[i] = data[i] * (value + sin(double(i)) * 1./cos(double(i)));
if (threadIdx.x==0) sm[blockIdx.x]=get_smid();
}
}
//__global__ void kernel_2(float* data, float value, int N)
//{
// int start = blockIdx.x * blockDim.x + threadIdx.x;
// for (int i = start; i < N; i += blockDim.x * gridDim.x)
// {
// data[i] = data[i] * (value + sin(double(i)) * 1./cos(double(i)));
// }
//}
/********/
/* MAIN */
/********/
int main()
{
const int N = 10000;
const float value = 5.55f;
const int rep_num = 20;
// --- CPU memory allocations
float *h_data1 = (float*) malloc(N*sizeof(float));
float *h_data2 = (float*) malloc(N*sizeof(float));
float *h_data1_ref = (float*) malloc(N*sizeof(float));
float *h_data2_ref = (float*) malloc(N*sizeof(float));
// --- CPU data initializations
srand(time(NULL));
for (int i=0; i<N; i++) {
h_data1[i] = rand() / RAND_MAX;
h_data2[i] = rand() / RAND_MAX;
}
// --- GPU memory allocations
float *d_data1, *d_data2;
gpuErrchk(cudaMalloc((void**)&d_data1, N*sizeof(float)));
gpuErrchk(cudaMalloc((void**)&d_data2, N*sizeof(float)));
// --- CPU -> GPU memory transfers
gpuErrchk(cudaMemcpy(d_data1, h_data1, N*sizeof(float), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_data2, h_data2, N*sizeof(float), cudaMemcpyHostToDevice));
// --- CPU data initializations
srand(time(NULL));
for (int i=0; i<N; i++) {
h_data1_ref[i] = h_data1[i] / value;
h_data2_ref[i] = h_data2[i] * (value + sin(double(i)) * 1./cos(double(i)));
}
// --- Stream creations
cudaStream_t stream1, stream2;
gpuErrchk(cudaStreamCreate(&stream1));
gpuErrchk(cudaStreamCreate(&stream2));
// --- Launch parameters configuration
int blockSize1, blockSize2, minGridSize1, minGridSize2, gridSize1, gridSize2;
cudaOccupancyMaxPotentialBlockSize(&minGridSize1, &blockSize1, kernel_1, 0, N);
cudaOccupancyMaxPotentialBlockSize(&minGridSize2, &blockSize2, kernel_2, 0, N);
gridSize1 = (N + blockSize1 - 1) / blockSize1;
gridSize2 = (N + blockSize2 - 1) / blockSize2;
// --- Allocating space for SM IDs
int *h_sm_11 = (int*) malloc(gridSize1*sizeof(int));
int *h_sm_12 = (int*) malloc(gridSize1*sizeof(int));
int *h_sm_21 = (int*) malloc(gridSize2*sizeof(int));
int *h_sm_22 = (int*) malloc(gridSize2*sizeof(int));
int *d_sm_11, *d_sm_12, *d_sm_21, *d_sm_22;
gpuErrchk(cudaMalloc((void**)&d_sm_11, gridSize1*sizeof(int)));
gpuErrchk(cudaMalloc((void**)&d_sm_12, gridSize1*sizeof(int)));
gpuErrchk(cudaMalloc((void**)&d_sm_21, gridSize2*sizeof(int)));
gpuErrchk(cudaMalloc((void**)&d_sm_22, gridSize2*sizeof(int)));
// --- Timing individual kernels
float time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
for (int i=0; i<rep_num; i++) kernel_1<<<gridSize1, blockSize1>>>(d_data1, value, d_sm_11, N);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("Kernel 1 - elapsed time: %3.3f ms \n", time/rep_num);
cudaEventRecord(start, 0);
for (int i=0; i<rep_num; i++) kernel_2<<<gridSize2, blockSize2>>>(d_data1, value, d_sm_21, N);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("Kernel 2 - elapsed time: %3.3f ms \n", time/rep_num);
// --- No stream case
cudaEventRecord(start, 0);
kernel_1<<<gridSize1, blockSize1>>>(d_data1, value, d_sm_11, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(h_data1, d_data1, N*sizeof(float), cudaMemcpyDeviceToHost));
// --- Results check
for (int i=0; i<N; i++) {
if (h_data1[i] != h_data1_ref[i]) {
printf("Kernel1 - Error at i = %i; Host = %f; Device = %f\n", i, h_data1_ref[i], h_data1[i]);
return;
}
}
#endif
kernel_2<<<gridSize2, blockSize2>>>(d_data1, value, d_sm_21, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
#endif
kernel_1<<<gridSize1, blockSize1>>>(d_data2, value, d_sm_12, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(d_data2, h_data2, N*sizeof(float), cudaMemcpyHostToDevice));
#endif
kernel_2<<<gridSize2, blockSize2>>>(d_data2, value, d_sm_22, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(h_data2, d_data2, N*sizeof(float), cudaMemcpyDeviceToHost));
for (int i=0; i<N; i++) {
if (h_data2[i] != h_data2_ref[i]) {
printf("Kernel2 - Error at i = %i; Host = %f; Device = %f\n", i, h_data2_ref[i], h_data2[i]);
return;
}
}
#endif
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("No stream - elapsed time: %3.3f ms \n", time);
// --- Stream case
cudaEventRecord(start, 0);
kernel_1<<<gridSize1, blockSize1, 0, stream1 >>>(d_data1, value, d_sm_11, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
#endif
kernel_1<<<gridSize1, blockSize1, 0, stream2 >>>(d_data2, value, d_sm_12, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
#endif
kernel_2<<<gridSize2, blockSize2, 0, stream1 >>>(d_data1, value, d_sm_21, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
#endif
kernel_2<<<gridSize2, blockSize2, 0, stream2 >>>(d_data2, value, d_sm_22, N);
#ifdef DEBUG_MODE
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
#endif
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("Stream - elapsed time: %3.3f ms \n", time);
cudaStreamDestroy(stream1);
cudaStreamDestroy(stream2);
printf("Test passed!\n");
gpuErrchk(cudaMemcpy(h_sm_11, d_sm_11, gridSize1*sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_sm_12, d_sm_12, gridSize1*sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_sm_21, d_sm_21, gridSize2*sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_sm_22, d_sm_22, gridSize2*sizeof(int), cudaMemcpyDeviceToHost));
printf("Kernel 1: gridSize = %i; blockSize = %i\n", gridSize1, blockSize1);
printf("Kernel 2: gridSize = %i; blockSize = %i\n", gridSize2, blockSize2);
for (int i=0; i<gridSize1; i++) {
printf("Kernel 1 - Data 1: blockNumber = %i; SMID = %d\n", i, h_sm_11[i]);
printf("Kernel 1 - Data 2: blockNumber = %i; SMID = %d\n", i, h_sm_12[i]);
}
for (int i=0; i<gridSize2; i++) {
printf("Kernel 2 - Data 1: blockNumber = %i; SMID = %d\n", i, h_sm_21[i]);
printf("Kernel 2 - Data 2: blockNumber = %i; SMID = %d\n", i, h_sm_22[i]);
}
cudaDeviceReset();
return 0;
}
N = 100
和 N = 10000
的内核时序
N = 100
kernel_1 0.003ms
kernel_2 0.005ms
N = 10000
kernel_1 0.011ms
kernel_2 0.053ms
因此,内核 1 的计算成本高于内核 2。
N = 100
的结果
Kernel 1: gridSize = 1; blockSize = 100
Kernel 2: gridSize = 1; blockSize = 100
Kernel 1 - Data 1: blockNumber = 0; SMID = 0
Kernel 1 - Data 2: blockNumber = 0; SMID = 1
Kernel 2 - Data 1: blockNumber = 0; SMID = 0
Kernel 2 - Data 2: blockNumber = 0; SMID = 1
在这种情况下,每个内核仅启动一个 block ,这是时间线。
如您所见,发生了重叠。通过查看上述结果,调度程序将两次调用的单个 block 并行地传递给两个可用的 SM,然后对内核 2 执行相同的操作。这似乎是发生重叠的主要原因。
N = 10000
的结果
Kernel 1: gridSize = 14; blockSize = 768
Kernel 2: gridSize = 10; blockSize = 1024
Kernel 1 - Data 1: blockNumber = 0; SMID = 0
Kernel 1 - Data 2: blockNumber = 0; SMID = 1
Kernel 1 - Data 1: blockNumber = 1; SMID = 1
Kernel 1 - Data 2: blockNumber = 1; SMID = 0
Kernel 1 - Data 1: blockNumber = 2; SMID = 0
Kernel 1 - Data 2: blockNumber = 2; SMID = 1
Kernel 1 - Data 1: blockNumber = 3; SMID = 1
Kernel 1 - Data 2: blockNumber = 3; SMID = 0
Kernel 1 - Data 1: blockNumber = 4; SMID = 0
Kernel 1 - Data 2: blockNumber = 4; SMID = 1
Kernel 1 - Data 1: blockNumber = 5; SMID = 1
Kernel 1 - Data 2: blockNumber = 5; SMID = 0
Kernel 1 - Data 1: blockNumber = 6; SMID = 0
Kernel 1 - Data 2: blockNumber = 6; SMID = 0
Kernel 1 - Data 1: blockNumber = 7; SMID = 1
Kernel 1 - Data 2: blockNumber = 7; SMID = 1
Kernel 1 - Data 1: blockNumber = 8; SMID = 0
Kernel 1 - Data 2: blockNumber = 8; SMID = 1
Kernel 1 - Data 1: blockNumber = 9; SMID = 1
Kernel 1 - Data 2: blockNumber = 9; SMID = 0
Kernel 1 - Data 1: blockNumber = 10; SMID = 0
Kernel 1 - Data 2: blockNumber = 10; SMID = 0
Kernel 1 - Data 1: blockNumber = 11; SMID = 1
Kernel 1 - Data 2: blockNumber = 11; SMID = 1
Kernel 1 - Data 1: blockNumber = 12; SMID = 0
Kernel 1 - Data 2: blockNumber = 12; SMID = 1
Kernel 1 - Data 1: blockNumber = 13; SMID = 1
Kernel 1 - Data 2: blockNumber = 13; SMID = 0
Kernel 2 - Data 1: blockNumber = 0; SMID = 0
Kernel 2 - Data 2: blockNumber = 0; SMID = 0
Kernel 2 - Data 1: blockNumber = 1; SMID = 1
Kernel 2 - Data 2: blockNumber = 1; SMID = 1
Kernel 2 - Data 1: blockNumber = 2; SMID = 1
Kernel 2 - Data 2: blockNumber = 2; SMID = 0
Kernel 2 - Data 1: blockNumber = 3; SMID = 0
Kernel 2 - Data 2: blockNumber = 3; SMID = 1
Kernel 2 - Data 1: blockNumber = 4; SMID = 1
Kernel 2 - Data 2: blockNumber = 4; SMID = 0
Kernel 2 - Data 1: blockNumber = 5; SMID = 0
Kernel 2 - Data 2: blockNumber = 5; SMID = 1
Kernel 2 - Data 1: blockNumber = 6; SMID = 1
Kernel 2 - Data 2: blockNumber = 6; SMID = 0
Kernel 2 - Data 1: blockNumber = 7; SMID = 0
Kernel 2 - Data 2: blockNumber = 7; SMID = 1
Kernel 2 - Data 1: blockNumber = 8; SMID = 1
Kernel 2 - Data 2: blockNumber = 8; SMID = 0
Kernel 2 - Data 1: blockNumber = 9; SMID = 0
Kernel 2 - Data 2: blockNumber = 9; SMID = 1
这是时间线:
在这种情况下,不会发生重叠。根据以上结果,这并不意味着两个 SM 没有同时被利用,而是(我认为),由于要启动的区 block 数量较多,分配两个不同内核的区 block 或相同的两个区 block 内核在性能方面没有太大区别,因此调度程序选择第二个选项。
我已经测试过,考虑到每个线程完成的工作更多,行为保持不变。
关于concurrency - CUDA 流和并发内核执行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/17201473/
我正在尝试在我的项目中使用 Knockout Concurrency 插件,目前我正在摆弄示例代码,但我没有让它工作: https://github.com/AndersMalmgren/Knocko
我正在尝试使用 grunt 运行多个监视任务,但似乎无法运行。我正在使用 grunt concurrent,但它似乎只运行我指定的一部分任务,只是短暂停止。 这是我的 gruntfile 的片段: c
我有一个使用 Grunt 的 Ionic 项目,它是由 Yeoman 构建的。我设法将其配置为在运行 Fedora 22 的本地计算机上正常工作。 目前,我正在尝试在 Centos 7 服务器实例上配
关闭。这个问题需要debugging details .它目前不接受答案。 想改进这个问题?将问题更新为 on-topic对于堆栈溢出。 1年前关闭。 Improve this question Co
Go is a concurrent lang 这是什么意思? 这是否意味着它是 C/C++/Java.. 的替代品? 最佳答案 A concurrent language是一种具有并发语言结构的语言
我正在尝试使用 Kafka 实现一个事件溯源系统,但遇到了以下问题。在新用户注册期间,我想检查用户提供的用户名是否已被使用。但是,请考虑 2 个用户尝试同时注册提供相同用户名的情况。 根据我对 ES
我正在完成 golang 之旅并进行最后的练习,将网络爬虫更改为并行爬行而不是重复爬行 (http://tour.golang.org/#73)。我只更改了抓取功能。 var used = m
ruby 版本 2.5.3 当我输入 rails new upload_app 时,出现以下错误 错误如下 Traceback (most recent call last): 39: fro
func main() { jobs := []Job{job1, job2, job3} numOfJobs := len(jobs) resultsChan := make
我正在尝试在 Rust async-await(即将稳定)中同时(而不是按顺序)运行 futures 列表,直到它们中的任何一个解析为 true . 想象一下有一个 Vec ,以及为每个文件运行的 f
当我看到这段代码时出现了问题: private static volatile ConcurrentHashMap cMap = null; static { cMap = new Concu
刚在lab环境下安装dcos环境,在centos7 linux机器上尝试安装dcos客户端时得到 **[root@rmavmdock5 dcos]# bash install.sh . http://
为什么要为 Scala fork ForkJoinPool? 哪种实现方式和哪种情况更受欢迎? 最佳答案 scala 库拥有自己的 ForkJoinPool 副本的明显原因是 scala 必须在 1.
是的,我知道。关于 NSOperation 世界有很多问题和答案,但我仍然有一些疑问。我会尝试用两部分的问题来解释我的疑虑。它们相互关联。 在 SO 帖子中 nsoperationqueue-and-
我将 Play Framework 2.1.1 与一个生成 java.util.concurrent.Future 结果的外部 java 库一起使用。我使用的是 scala future 而不是 Ak
我们使用 Doug Lea 的并发库已有 8 年多了。出于向后兼容性的原因,我们的代码仅限于使用 Java 2 语言级别和 JDK 1.3 库。 现在我们正在开发一个主要的新版本,并最终能够使用 Ja
此问题涉及当 saga 数据保留在 Azure 表存储中时对 saga 数据的并发访问。它也是在 Prefer 的文档中找到的引用信息:http://docs.particular.net/nserv
我有一个创建锁的方法。 ReadWriteLock lock = new ReentrantReadWriteLock(); 然后我使用 Lock Interface 将该对象传递到一个方法中。 m
当我在 Mac OSX 命令行上的 python 中执行以下操作时: >>> from concurrent.futures import ProcessPoolExecutor 我明白了 Modul
我正在 listview 的线程池上创建异步任务。我正在通过 asynchtask 的 listarray 处理这些任务。当 fragment 被销毁时我必须删除这些任务,并且当我在销毁最后一个 fr
我是一名优秀的程序员,十分优秀!