gpt4 book ai didi

c++ - 在 Debug模式下,所有程序运行良好,但在 Release模式下,我的 cudaMalloc 操作出现错误

转载 作者:行者123 更新时间:2023-11-30 20:03:28 25 4
gpt4 key购买 nike

在debug模式下程序运行良好,但在Release模式下memcpy操作出现错误

 CellBot *hCellList;
CellBot *dCellList;
size_t CellSize = WorldConst.numberOfCells * sizeof(CellBot);
assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));
gpuAssert(cudaMalloc((void**)&dCellList, CellSize));


::memcpy(hCellList, CellList.data(), CellSize);
gpuAssert(cudaMemcpy(dCellList, hCellList, CellSize, cudaMemcpyHostToDevice));

对此没有任何想法。我总是在 Debug模式下工作,当决定发布时会出现这样的错误。程序停止在这行代码上:

::memcpy(hCellList, CellList.data(), CellSize);

OgreCu_0.01.exe 中的 0x00007FFB9820C447 (vcruntime140.dll) 抛出异常:0xC0000005:访问冲突写入位置 0x0000000000000000。

Visual Studio 在 memcpy.asm 中显示错误线标记**

 CopyUp:
cmp r8, 128
jbe XmmCopySmall

bt __favor, __FAVOR_ENFSTRG ; check for ENFSTRG (enhanced fast strings)
jnc XmmCopyUp ; If Enhanced Fast String not available, use XMM

; use Enhanced Fast Strings
; but first align the destination dst to 16 byte alignment
mov rax, r11 ; return original destination pointer
mov r11, rdi ; save rdi in r11
mov rdi, rcx ; move destination pointer to rdi
mov rcx, r8 ; move length to rcx
mov r8, rsi ; save rsi in r8
mov rsi, r10 ; move source pointer to rsi
**rep movsb ; copy source to destination buffer**
mov rsi, r8 ; restore rsi
mov rdi, r11 ; restore rdi
ret

我改变::memcpy(hCellList, CellList.data(), CellSize);

for (int e = 0; e < WorldConst.numberOfCells; e++)
{
hCellList[e] = CellList[e];
}

hCellList[e] = CellList[e]; 中出现同样的错误

CellBot的结构

struct CellBot
{
int mainId;
int subId;

Vec3 coord;
Vec3 speed;
Vec3 nspeed;
Vec3 velocity;
Vec3 nvelocity;

float radiusView;
float radiusAttraction;
float radiusRepulsion;

float forceAttraction;
float forceRepulsion;

float radius;
float mass;
float frictionBounce;

int colorId;
int groupId;

};

Vec3:

template <typename T=float>
class XVector3
{
public:

typedef T value_type;

__host__ __device__ inline XVector3() : x(0.0f), y(0.0f), z(0.0f) {}
__host__ __device__ inline XVector3(T a) : x(a), y(a), z(a) {}
__host__ __device__ inline XVector3(const T* p) : x(p[0]), y(p[1]), z(p[2]) {}
__host__ __device__ inline XVector3(T x_, T y_, T z_) : x(x_), y(y_), z(z_)
{
VEC3_VALIDATE();
}

__host__ __device__ inline operator T* () { return &x; }
__host__ __device__ inline operator const T* () const { return &x; };

__host__ __device__ inline void Set(T x_, T y_, T z_) { VEC3_VALIDATE(); x = x_; y = y_; z = z_;}

__host__ __device__ inline XVector3<T> operator * (T scale) const { XVector3<T> r(*this); r *= scale; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator / (T scale) const { XVector3<T> r(*this); r /= scale; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator + (const XVector3<T>& v) const { XVector3<T> r(*this); r += v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator - (const XVector3<T>& v) const { XVector3<T> r(*this); r -= v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator /(const XVector3<T>& v) const { XVector3<T> r(*this); r /= v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator *(const XVector3<T>& v) const { XVector3<T> r(*this); r *= v; return r; VEC3_VALIDATE();}

__host__ __device__ inline XVector3<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator +=(const XVector3<T>& v) {x += v.x; y += v.y; z += v.z; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator -=(const XVector3<T>& v) {x -= v.x; y -= v.y; z -= v.z; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator /=(const XVector3<T>& v) {x /= v.x; y /= v.y; z /= v.z; VEC3_VALIDATE(); return *this; }
__host__ __device__ inline XVector3<T>& operator *=(const XVector3<T>& v) {x *= v.x; y *= v.y; z *= v.z; VEC3_VALIDATE(); return *this; }

__host__ __device__ inline bool operator != (const XVector3<T>& v) const { return (x != v.x || y != v.y || z != v.z); }

// negate
__host__ __device__ inline XVector3<T> operator -() const { VEC3_VALIDATE(); return XVector3<T>(-x, -y, -z); }

__host__ __device__ void Validate()
{
VEC3_VALIDATE();
}

T x,y,z;
};

typedef XVector3<float> Vec3;
typedef XVector3<float> Vector3;

// lhs scalar scale
template <typename T>
__host__ __device__ XVector3<T> operator *(T lhs, const XVector3<T>& rhs)
{
XVector3<T> r(rhs);
r *= lhs;
return r;
}

template <typename T>
__host__ __device__ bool operator==(const XVector3<T>& lhs, const XVector3<T>& rhs)
{
return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z);
}

最佳答案

很难理解你的问题。请写出更完整的错误消息并解释您下次要做什么!

但是我的猜测是,您会遇到以下问题:assert 语句速度很慢,因此通常只能在 Debug模式下编译到代码中。在 Release模式下,它们通常被简单地忽略。

但是,在您的代码中,您在 assert 内使用了 malloc。因此,在调试版本中,您可以获得所需的内存,而在发布版本中,您什么也得不到,并且程序崩溃。该行是:

assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));

你应该做的是:

hCellList = (CellBot *)malloc(CellSize);
assert(!(hCellList == NULL));

关于c++ - 在 Debug模式下,所有程序运行良好,但在 Release模式下,我的 cudaMalloc 操作出现错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51560592/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com