memcpy上未指定的启动失败在库达运行我的程序时,我遇到了一个“未指定的启动失败”。我检查过错误了。这个程序是一个微分方程的求解器。它迭代总ITER次数。房间X和房间_Y是矩阵的宽度和高度。这里是标题,它的名称是“solate:#define ITER_BETWEEN_SAVES 10000#define TOTAL_ITER 10000#define ROOM_X 2048#define ROOM_Y 2048#define SOURCE_DIM_X 200#define SOURCE_DIM_Y 1000#define ALPHA 1.11e-4#define DELTA_T 10#define H 0.1#include <stdio.h>void Matrix(float* M);void SolverCPU(float* M1, float* M2);__global__ void SolverGPU(float* M1, float* M2);下面是填充矩阵的内核和函数:#include "solver.h"#include<cuda.h>void Matrix(float* M){
for (int j = 0; j < SOURCE_DIM_Y; ++j) {
for (int i = 0; i < SOURCE_DIM_X; ++i) {
M[(i+(ROOM_X/2 - SOURCE_DIM_X/2)) + ROOM_X * (j+(ROOM_Y/2 - SOURCE_DIM_Y/2))] = 100;
}
}}
__global__ void SolverGPU(float* M1,float *M2) {
int i =threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
float M1_Index = M1[i + ROOM_X * j];
float M1_IndexUp = M1[i+1 + ROOM_X * j];
float M1_IndexDown =M1[i-1 + ROOM_X * j];
float M1_IndexLeft = M1[i + ROOM_X * (j+1)];
float M1_IndexRight = M1[i + ROOM_X *(j-1)];
M2[i + ROOM_X * j] = M1_Index + (ALPHA * DELTA_T / (H*H)) * (M1_IndexUp + M1_IndexDown + M1_IndexLeft +M1_IndexRight - 4*M1_Index); }编译没有问题。当我检查我的错误时,“未指定的启动失败”出现在内核后面的memcpy上。好的,所以我读过它通常是因为内核不能正常运行。但是我找不到内核中的错误.我想这是一个很简单的错误,但是找不到它。
添加回答
举报
0/150
提交
取消