gpt4 book ai didi

c# - CUDA.NET 中的上下文迁移

转载 作者:行者123 更新时间:2023-11-30 12:18:09 28 4
gpt4 key购买 nike

我目前正在使用 GASS 的 CUDA.NET 库。我需要在一个 CPU 线程中初始化 cuda 数组(实际上是 cublas 向量,但这并不重要)并在其他 CPU 线程中使用它们。但是保存所有初始化数组和加载函数的 CUDA 上下文只能附加到一个 CPU 线程。

有一种称为上下文迁移 API 的机制可以从一个线程分离上下文并将其附加到另一个线程。但我不知道如何在 CUDA.NET 中正确使用它。

我试过这样的:

class Program
{
private static float[] vector1, vector2;
private static CUDA cuda;
private static CUBLAS cublas;

private static CUdeviceptr ptr;

static void Main(string[] args)
{
cuda = new CUDA(false);
cublas = new CUBLAS(cuda);
cuda.Init();
cuda.CreateContext(0);
AllocateVectors();
cuda.DetachContext();
CUcontext context = cuda.PopCurrentContext();
GetVectorFromDeviceAsync(context);
}

private static void AllocateVectors()
{
vector1 = new float[]{1f, 2f, 3f, 4f, 5f};
ptr = cublas.Allocate(vector1.Length, sizeof (float));
cublas.SetVector(vector1, ptr);

vector2 = new float[5];
}


private static void GetVectorFromDevice(object objContext)
{
CUcontext localContext = (CUcontext) objContext;
cuda.PushCurrentContext(localContext);
cuda.AttachContext(localContext);

//change vector somehow
vector1[0] = -1;
//copy changed vector to device
cublas.SetVector(vector1, ptr);
cublas.GetVector(ptr, vector2);
CUDADriver.cuCtxPopCurrent(ref localContext);
}

private static void GetVectorFromDeviceAsync(CUcontext cUcontext)
{
Thread thread = new Thread(GetVectorFromDevice);
thread.IsBackground = false;
thread.Start(cUcontext);
}
}

但由于未附加上下文,因此在尝试将更改的向量复制到设备时执行失败。其他原因不太可能,因为它在单线程模式下工作正常。我有什么想法可以让它发挥作用吗?

最佳答案

我仍然没有找到解决这个问题的方法,但我确实想出了一个解决方法。重点是在一个 CPU 线程中执行所有与 CUDA 相关的功能。例如,您可以这样做:

class Program
{
private static float[] vector1, vector2;
private static CUDA cuda;
private static CUBLAS cublas;

private static CUdeviceptr ptr;

private static readonly AutoResetEvent autoResetEvent = new AutoResetEvent(false);

static void Main()
{
cuda = new CUDA(true);
cublas = new CUBLAS(cuda);

//allocate vector on cuda device in main thread
CudaManager.CallMethod(AllocateVectors);

//changing first vector from other thread
Thread changeThread = new Thread(ChangeVectorOnDevice_ThreadRun) { IsBackground = false };
changeThread.Start();

//wait for changeThread to finish
autoResetEvent.WaitOne();

//getting vector from device in another one thread
Thread getThread = new Thread(GetVectorFromDevice_ThreadRun) { IsBackground = false };
getThread.Start();

//wait for getThread to finish
autoResetEvent.WaitOne();

Console.WriteLine("({0}, {1}, {2}, {3}, {4})", vector2[0], vector2[1], vector2[2], vector2[3], vector2[4]);

Console.ReadKey(true);
}

private static void AllocateVectors()
{
vector1 = new[] { 1f, 2f, 3f, 4f, 5f };
vector2 = new float[5];
//allocate memory and copy first vector to device
ptr = cublas.Allocate(vector1.Length, sizeof(float));
cublas.SetVector(vector1, ptr);

}

private static void GetVectorFromDevice()
{
cublas.GetVector(ptr, vector2);
}

private static void ChangeVectorOnDevice()
{
//changing vector and copying it to device
vector1 = new[] { -1f, -2f, -3f, -4f, -5f };
cublas.SetVector(vector1, ptr);
}

private static void ChangeVectorOnDevice_ThreadRun()
{
CudaManager.CallMethod(ChangeVectorOnDevice);
//releasing main thread
autoResetEvent.Set();
}

private static void GetVectorFromDevice_ThreadRun()
{
CudaManager.CallMethod(GetVectorFromDevice);
//releasing main thread
autoResetEvent.Set();
}
}

public static class CudaManager
{
public static Action WorkMethod { get; private set; }

private static readonly AutoResetEvent actionRecived = new AutoResetEvent(false);
private static readonly AutoResetEvent callbackEvent = new AutoResetEvent(false);

private static readonly object mutext = new object();
private static bool isCudaThreadRunning;

private static void ThreadRun()
{
//waiting for work method to execute
while (actionRecived.WaitOne())
{
//invoking recived method
WorkMethod.Invoke();
//releasing caller thread
callbackEvent.Set();
}
}

static CudaManager()
{
Run();
}

public static void Run()
{
if (!isCudaThreadRunning)
{
Thread thread = new Thread(ThreadRun);
thread.IsBackground = true;
thread.Start();
isCudaThreadRunning = true;
}
}

public static void CallMethod(Action method)
{
lock (mutext)
{
WorkMethod = method;
//releasing ThreadRun method
actionRecived.Set();
//blocking caller thread untill delegate invokation is complete
callbackEvent.WaitOne();
}
}
}

希望对大家有所帮助。

关于c# - CUDA.NET 中的上下文迁移,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2711323/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com